<a href="https://colab.research.google.com/github/sayanbanerjee32/multimodal_llm/blob/main/fine_tuned_model_diff.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers==4.44.2
!pip install -Uq peft

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m322.5/322.5 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import torch
from transformers import AutoModelForCausalLM
from peft import PeftModel
import numpy as np
if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

In [None]:
def load_model(model_path, device='cpu'):
    return AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float32, low_cpu_mem_usage=True).to(device)

def compare_models(base_model, fine_tuned_model):
    differences = []
    for (name1, p1), (name2, p2) in zip(base_model.named_parameters(), fine_tuned_model.named_parameters()):
        if name1 == name2:
            diff = torch.norm(p1 - p2).item()
            differences.append((name1, diff))
    return differences

In [None]:
# Load the base model
base_model = load_model("microsoft/Phi-3-mini-4k-instruct", device = device)

config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/16.5k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

In [None]:
# Load your fine-tuned model
fine_tuned_model = load_model("sayanbanerjee32/multimodal-phi3-4k-instruct-llava", device = device)

config.json:   0%|          | 0.00/1.01k [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/16.3k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/172 [00:00<?, ?B/s]

In [None]:
# Compare the models
differences = compare_models(base_model, fine_tuned_model)

# Sort differences by magnitude
differences.sort(key=lambda x: x[1], reverse=True)

RuntimeError: The size of tensor a (32064) must match the size of tensor b (32011) at non-singleton dimension 0

In [None]:
base_model

Phi3ForCausalLM(
  (model): Phi3Model(
    (embed_tokens): Embedding(32064, 3072, padding_idx=32000)
    (embed_dropout): Dropout(p=0.0, inplace=False)
    (layers): ModuleList(
      (0-31): 32 x Phi3DecoderLayer(
        (self_attn): Phi3Attention(
          (o_proj): Linear(in_features=3072, out_features=3072, bias=False)
          (qkv_proj): Linear(in_features=3072, out_features=9216, bias=False)
          (rotary_emb): Phi3RotaryEmbedding()
        )
        (mlp): Phi3MLP(
          (gate_up_proj): Linear(in_features=3072, out_features=16384, bias=False)
          (down_proj): Linear(in_features=8192, out_features=3072, bias=False)
          (activation_fn): SiLU()
        )
        (input_layernorm): Phi3RMSNorm((3072,), eps=1e-05)
        (resid_attn_dropout): Dropout(p=0.0, inplace=False)
        (resid_mlp_dropout): Dropout(p=0.0, inplace=False)
        (post_attention_layernorm): Phi3RMSNorm((3072,), eps=1e-05)
      )
    )
    (norm): Phi3RMSNorm((3072,), eps=1e-05)
  )
 

In [None]:
print("Top 10 layers with the largest differences:")
for name, diff in differences[:10]:
    print(f"{name}: {diff}")

In [None]:
# Calculate and print statistics
diff_values = [d for _, d in differences]
print(f"\nMean difference: {np.mean(diff_values)}")
print(f"Median difference: {np.median(diff_values)}")
print(f"Max difference: {np.max(diff_values)}")
print(f"Min difference: {np.min(diff_values)}")

In [None]:
# Count layers with significant changes (e.g., difference > 0.01)
significant_changes = sum(1 for _, d in differences if d > 0.01)
print(f"\nLayers with significant changes (>0.01): {significant_changes} out of {len(differences)}")