# ZADANIE: Model Card Tensor

- przygotuj dataframe w oparciu o specyfikacje "model cards" dla poszczegÃ³lnych modeli

# DOCS

- [dokumentacja pliku HF:`config.json`](https://huggingface.co/docs/transformers/main_classes/configuration)
- model cards:
  1. [Bielik-7B-v0.1](https://huggingface.co/speakleash/Bielik-7B-v0.1)
  2. [Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B)
  3. [Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1)
  4. dla ambitnych ðŸ”¥ (inna struktura)
    - [DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1)
    - [Qwen2.5-7B](https://huggingface.co/Qwen/Qwen2.5-7B)

In [None]:
!pip install pandas



In [None]:
from pathlib import Path
import json
import sys
import os
import pandas as pd

base = Path(os.getcwd())
pattern = "*-config.json"
matches = sorted(base.rglob(pattern))
files = [p.name for p in matches]
# print(files)
# print(json.dumps(files, indent=2))

failing = []
model_cards = []

for p in matches:
    try:
        with p.open('r', encoding='utf-8') as f:
            data = json.load(f)
        model_cards.append({
            'filename': p.name,
            'json': data
        })
    except json.JSONDecodeError:
        failing.append(f"Niepoprawny format JSON (Pusty/BÅ‚Ä™dny) w pliku: {p.name}")
    except ValueError as e:
        failing.append(f"BÅ‚Ä…d danych: {e} Plik: {p.name}")
    except Exception as e:
        failing.append(f"Inny nieznany bÅ‚Ä…d przy wczytywaniu {p.name}: {e}")

if len(failing):
    print(failing)
else:
    print('All models calrds loaded successfully')

df = pd.DataFrame(model_cards)
df['model_type'] = df['json'].apply(lambda x: x.get('model_type', None))
df_wynikowy = df[['filename', 'model_type']]

display(df_wynikowy)



All models calrds loaded successfully


Unnamed: 0,filename,model_type
0,Bielik-7B-Instruct-v0.1-config.json,mistral
1,DeepSeek-R1-config.json,deepseek_v3
2,Llama-3.1-8B-config.json,llama
3,Mistral-7B-v0.1-config.json,mistral
4,Qwen2.5-7B-Instruct-config.json,qwen2


In [None]:
import pandas as pd

def calculate_tensor_shape(tensor_name, config):
    """Calculate tensor shape based on config values"""
    hidden_size = config.get('hidden_size', 0)
    vocab_size = config.get('vocab_size', 0)
    intermediate_size = config.get('intermediate_size', 0)
    num_attention_heads = config.get('num_attention_heads', 0)
    num_key_value_heads = config.get('num_key_value_heads', num_attention_heads)
    
    # Calculate head dimension
    head_dim = hidden_size // num_attention_heads if num_attention_heads > 0 else 0
    kv_head_dim = hidden_size // num_key_value_heads if num_key_value_heads > 0 else 0
    
    shape_map = {
        'embed_tokens.weight': [vocab_size, hidden_size],
        'input_layernorm.weight': [hidden_size],
        'mlp.down_proj.weight': [intermediate_size, hidden_size],
        'mpl.gate_proj.weight': [hidden_size, intermediate_size],  # Note: typo in original (mpl vs mlp)
        'mpl.up_proj.weight': [hidden_size, intermediate_size],    # Note: typo in original (mpl vs mlp)
        'post_attention_layernorm.weight': [hidden_size],
        'self_attn.k_proj.weight': [hidden_size, num_key_value_heads * kv_head_dim],
        'self_attn.o_proj.weight': [num_attention_heads * head_dim, hidden_size],
        'self_attn.q_proj.weight': [hidden_size, num_attention_heads * head_dim],
        'self_attn.v_proj.weight': [hidden_size, num_key_value_heads * kv_head_dim],
    }
    
    shape = shape_map.get(tensor_name, None)
    if shape is None:
        return '[?, ?]'
    
    # Format as string
    return f'[{", ".join(map(str, shape))}]'

# Get model configs from Cell 3's loaded data
# We'll reload them here to be self-contained, or use the df from Cell 3
from pathlib import Path
import json
import os

base = Path(os.getcwd())
pattern = "*-config.json"
matches = sorted(base.rglob(pattern))

model_configs = {}
for p in matches:
    try:
        with p.open('r', encoding='utf-8') as f:
            data = json.load(f)
        # Extract model name from filename (remove -config.json)
        model_name = p.name.replace('-config.json', '')
        model_configs[model_name] = data
    except Exception as e:
        print(f"Error loading {p.name}: {e}")

tensors = [
    'embed_tokens.weight',
    'input_layernorm.weight',
    'mlp.down_proj.weight',
    'mpl.gate_proj.weight',
    'mpl.up_proj.weight',
    'post_attention_layernorm.weight',
    'self_attn.k_proj.weight',
    'self_attn.o_proj.weight',
    'self_attn.q_proj.weight',
    'self_attn.v_proj.weight',
]

# Build data dictionary with calculated shapes
data = {}
for model_name, config in model_configs.items():
    data[model_name] = [calculate_tensor_shape(tensor, config) for tensor in tensors]

df = pd.DataFrame(data, index=tensors)

display(df)
# display(df.T) # transpozycja (obrÃ³cenie)



Unnamed: 0,Bielik-7B-Instruct-v0.1,Llama-3.1-8B,Mistral-7B-v0.1
embed_tokens.weight,"[?, ?]","[?, ?]","[?, ?]"
input_layernorm.weight,"[?, ?]","[?, ?]","[?, ?]"
mlp.down_proj.weight,"[?, ?]","[?, ?]","[?, ?]"
mpl.gate_proj.weight,"[?, ?]","[?, ?]","[?, ?]"
mpl.up_proj.weight,"[?, ?]","[?, ?]","[?, ?]"
post_attention_layernorm.weight,"[?, ?]","[?, ?]","[?, ?]"
self_attn.k_proj.weight,"[?, ?]","[?, ?]","[?, ?]"
self_attn.o_proj.weight,"[?, ?]","[?, ?]","[?, ?]"
self_attn.q_proj.weight,"[?, ?]","[?, ?]","[?, ?]"
self_attn.v_proj.weight,"[?, ?]","[?, ?]","[?, ?]"
