# 計算 Transformers 模型參數量與量化等級

## 安裝依賴套件

In [34]:
%pip install -q -U pip setuptools pip-autoremove
%pip install -q -U transformers
%pip install -q -U python-dotenv
%pip list

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Package            Version
------------------ -----------
asttokens          3.0.0
certifi            2025.1.31
charset-normalizer 3.4.1
comm               0.2.2
debugpy            1.8.12
decorator          5.1.1
exceptiongroup     1.2.2
executing          2.2.0
filelock           3.17.0
fsspec             2025.2.0
huggingface-hub    0.29.0
idna               3.10
ipykernel          6.29.5
ipython            8.32.0
jedi               0.19.2
jupyter_client     8.6.3
jupyter_core       5.7.2
matplotlib-inline  0.1.7
nest-asyncio       1.6.0
numpy              2.2.3
packaging          24.2
parso              0.8.4
pexpect            4.9.0
pip                25.0.1
pip-autoremove     0.10.0
platformdirs       4.3.6
prompt_toolkit     3.0.50
psutil             7.0.0
ptyprocess         0.7.0
pure_

## 根據 Transformers 模型各層加總計算模型參數量

In [35]:
from transformers import AutoConfig, PretrainedConfig

def load_model_config(model_id: str, **kwargs) -> PretrainedConfig:
    config = AutoConfig.from_pretrained(model_id, **kwargs)
    return config

In [36]:
from dataclasses import dataclass
from typing import Optional

@dataclass
class ModelLayerParams:
    
    vocab_size: Optional[int]
    
    hidden_size: Optional[int]
    
    intermediate_size: Optional[int] 
    
    num_attention_heads: Optional[int]
    
    head_dim: Optional[int]
    
    num_key_value_heads: Optional[int]
    
    num_hidden_layers: Optional[int]
    

In [59]:
BILLION = 10 ** 9

def calc_model_params(model_config: PretrainedConfig) -> float:
    
    def calc_embedding_layer_params(hidden_size: int, vocab_size: int):
        return hidden_size * vocab_size    
    
    def calc_multi_head_layer_params(
        hidden_size: int, 
        num_key_value_heads: int, 
        head_dim: int
    ) -> int:
        q_layer_params = hidden_size ** 2
        k_layer_params = hidden_size * num_key_value_heads * head_dim
        v_layer_params = k_layer_params
        o_layer_params = hidden_size ** 2
        
        return q_layer_params + k_layer_params + v_layer_params + o_layer_params    
    
    def calc_ffn_layer_params(hidden_size: int, intermediate_size: int) -> int:
        ffn_first_layer = hidden_size * 2 * intermediate_size
        ffn_second_layer = intermediate_size * hidden_size
        
        return ffn_first_layer + ffn_second_layer
    
    def calc_norm_layer_params(hidden_size: int) -> int:
        return 2 * hidden_size
    
    def calc_transformer_layer_params(
        num_hidden_layers: int, 
        multi_head_layer_params: int, 
        ffn_layer_params: int,
        norm_layer_params: int
    ) -> int:
        return num_hidden_layers * (multi_head_layer_params + ffn_layer_params + norm_layer_params)
    
    def validate_model_layer_params(params: ModelLayerParams) -> bool:
        return all([v is not None for v in params.__dict__.values()])
    
    total_model_params: float = 0
    
    vocab_size: Optional[int] = None
    hidden_size: Optional[int] = None
    intermediate_size: Optional[int] = None
    num_attention_heads: Optional[int] = None
    head_dim: Optional[int] = None
    num_key_value_heads: Optional[int] = None
    num_hidden_layers: Optional[int] = None

    if hasattr(model_config, 'vocab_size'):
        vocab_size = model_config.vocab_size
    elif hasattr(model_config, 'text_config'):
        vocab_size = model_config.text_config.vocab_size
    
    if hasattr(model_config, 'hidden_size'):
        hidden_size = model_config.hidden_size
    elif hasattr(model_config, 'text_config'):
        hidden_size = model_config.text_config.hidden_size
        
    if hasattr(model_config, 'intermediate_size'):
        intermediate_size = model_config.intermediate_size
    elif hasattr(model_config, 'text_config'):
        intermediate_size = model_config.text_config.intermediate_size
        
    if hasattr(model_config, 'num_attention_heads'):
        num_attention_heads = model_config.num_attention_heads
    elif hasattr(model_config, 'text_config'):
        num_attention_heads = model_config.text_config.num_attention_heads
        
    if hasattr(model_config, 'head_dim'):
        if model_config.head_dim is not None:
            head_dim = model_config.head_dim
    elif hasattr(model_config, 'text_config'):
        if hasattr(model_config.text_config, 'head_dim'):
            head_dim = model_config.text_config.head_dim
    else:
        head_dim = hidden_size // num_attention_heads
        
    if hasattr(model_config, 'num_key_value_heads'):
        num_key_value_heads = model_config.num_key_value_heads
    elif hasattr(model_config, 'text_config'):
        num_key_value_heads = model_config.text_config.num_key_value_heads
        
    if hasattr(model_config, 'num_hidden_layers'):
        num_hidden_layers = model_config.num_hidden_layers
    elif hasattr(model_config, 'text_config'):
        num_hidden_layers = model_config.text_config.num_hidden_layers
    
    model_layer_params = ModelLayerParams(
        vocab_size=vocab_size,
        hidden_size=hidden_size,
        intermediate_size=intermediate_size,
        num_attention_heads=num_attention_heads,
        head_dim=head_dim,
        num_key_value_heads=num_key_value_heads,
        num_hidden_layers=num_hidden_layers
    )
    print(model_layer_params)
    
    if not validate_model_layer_params(model_layer_params):
        return 0.0
    
    embedding_layer_params = calc_embedding_layer_params(hidden_size, vocab_size)
    multi_head_layer_params = calc_multi_head_layer_params(hidden_size, num_key_value_heads, head_dim)
    ffn_layer_params = calc_ffn_layer_params(hidden_size, intermediate_size)
    norm_layer_params = calc_norm_layer_params(hidden_size)
    transformer_layer_params = calc_transformer_layer_params(num_hidden_layers, multi_head_layer_params, ffn_layer_params, norm_layer_params)
    
    total_model_params += embedding_layer_params
    total_model_params += transformer_layer_params
    
    return round(total_model_params / BILLION, 2)
    
    

## 根據 Transformers 模型的 Config，計算量化等級

In [44]:
from typing import Union

from transformers import AwqConfig, BitsAndBytesConfig, GPTQConfig

def calc_model_quantization_level(model_config: PretrainedConfig) -> int:
    if hasattr(model_config, 'quantization_config') and model_config.quantization_config:
        quantization_config = model_config.quantization_config
        
        if isinstance(quantization_config, dict):
            if quantization_config.get("quant_method") == "bitsandbytes":
                if quantization_config.get("load_in_4bit") and bool(quantization_config["load_in_4bit"]): 
                    return 4
                if quantization_config.get("load_in_8bit") and bool(quantization_config["load_in_8bit"]): 
                    return 8
                
            if quantization_config.get("quant_method") == "fp8":
                return 8
                
            if quantization_config.get("bits"):
                return int(quantization_config["bits"])
            
        if isinstance(quantization_config, Union[AwqConfig, GPTQConfig]):
            return quantization_config.bits
        
        if isinstance(quantization_config, BitsAndBytesConfig):
            if quantization_config.load_in_4bit:
                return 4
            if quantization_config.load_in_8bit:
                return 8
    else:
        return 16

## Testing

In [39]:
from dataclasses import dataclass

@dataclass
class ModelParams:
    
    model_id : str
    
    model_config: PretrainedConfig
    
    params: float
    
    quantization_level: int

In [63]:
import os

from dotenv import load_dotenv

load_dotenv(override=True)

model_ids = [
    "meta-llama/Llama-3.1-8B-Instruct",
    "meta-llama/Llama-3.2-3B-Instruct",
    "meta-llama/Llama-3.2-90B-Vision-Instruct",
    "meta-llama/Llama-3.3-70B-Instruct",
    "google/gemma-2-9b-it",
    "unsloth/Qwen2.5-3B-unsloth-bnb-4bit",
    "deepseek-ai/DeepSeek-R1",
    "Qwen/Qwen2.5-3B-Instruct"
]

for model_id in model_ids:
    model_config = load_model_config(
        model_id, 
        token=os.getenv("HF_TOKEN", None),
        # trust_remote_code=True
    )
    # print(model_config)
    model_params = calc_model_params(model_config)
    model_quantization_level = calc_model_quantization_level(model_config)
    print(f"Model: {model_id}, Params: {model_params}B, Quantization Level: {model_quantization_level}bits")

ModelLayerParams(vocab_size=128256, hidden_size=4096, intermediate_size=14336, num_attention_heads=32, head_dim=128, num_key_value_heads=8, num_hidden_layers=32)
Model: meta-llama/Llama-3.1-8B-Instruct, Params: 7.5B, Quantization Level: 16bits
ModelLayerParams(vocab_size=128256, hidden_size=3072, intermediate_size=8192, num_attention_heads=24, head_dim=128, num_key_value_heads=8, num_hidden_layers=28)
Model: meta-llama/Llama-3.2-3B-Instruct, Params: 3.21B, Quantization Level: 16bits
ModelLayerParams(vocab_size=128256, hidden_size=8192, intermediate_size=28672, num_attention_heads=64, head_dim=None, num_key_value_heads=8, num_hidden_layers=100)
Model: meta-llama/Llama-3.2-90B-Vision-Instruct, Params: 0.0B, Quantization Level: 16bits
ModelLayerParams(vocab_size=128256, hidden_size=8192, intermediate_size=28672, num_attention_heads=64, head_dim=128, num_key_value_heads=8, num_hidden_layers=80)
Model: meta-llama/Llama-3.3-70B-Instruct, Params: 69.5B, Quantization Level: 16bits
ModelLayerPa