In [1]:
from transformers import AutoModelForTokenClassification
from peft import prepare_model_for_kbit_training, get_peft_model, LoraConfig, TaskType

import numpy as np

In [2]:
model_id = "meta-llama/Llama-3.2-3B"


model = AutoModelForTokenClassification.from_pretrained(model_id)
lora_config = LoraConfig(
    r=64,  # the dimension of the low-rank matrices
    lora_alpha=128, # scaling factor for LoRA activations vs pre-trained weight activations
    lora_dropout=0.05, 
    bias='none',
    inference_mode=False,
    task_type=TaskType.CAUSAL_LM,
    target_modules=['o_proj', 'v_proj', "q_proj", "k_proj", "gate_proj", "down_proj", "up_proj"]
) 


# model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)
# Trainable Parameters
model.print_trainable_parameters()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of LlamaForTokenClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-3B and are newly initialized: ['score.bias', 'score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 97,255,424 || all params: 3,310,011,394 || trainable%: 2.9382


In [5]:
model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForTokenClassification(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 3072)
        (layers): ModuleList(
          (0-27): 28 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=3072, out_features=3072, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=3072, out_features=64, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=64, out_features=3072, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): 

In [4]:
from src.metrics.directionality import AttentionExtractor, SymmetryScore, DirectionalityScore

extractor = AttentionExtractor(
    model=model,
    q_path="model.layers[layer_idx].self_attn.q_proj",
    k_path="model.layers[layer_idx].self_attn.k_proj",
    # q_path="base_model.model.model.layers[layer_idx].self_attn.q_proj",
    # k_path="base_model.model.model.layers[layer_idx].self_attn.k_proj",
    attention_type="grouped",
    # is_lora=True, merge_lora=True
)

In [7]:
module = model.base_model.model.model.layers[0].self_attn.q_proj

module

lora.Linear(
  (base_layer): Linear(in_features=3072, out_features=3072, bias=False)
  (lora_dropout): ModuleDict(
    (default): Dropout(p=0.05, inplace=False)
  )
  (lora_A): ModuleDict(
    (default): Linear(in_features=3072, out_features=64, bias=False)
  )
  (lora_B): ModuleDict(
    (default): Linear(in_features=64, out_features=3072, bias=False)
  )
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
  (lora_magnitude_vector): ModuleDict()
)

In [8]:
from peft.tuners.lora import LoraLayer

isinstance(module, LoraLayer)

True

In [4]:
res = score()

res

[0.013316988945007324,
 0.013686060905456543,
 0.0078051090240478516,
 0.009980559349060059,
 0.02069699764251709,
 0.0016776323318481445,
 0.02029287815093994,
 0.02415013313293457,
 0.05804133415222168,
 0.02527034282684326,
 0.04807102680206299,
 0.003994584083557129,
 0.021612167358398438,
 0.022856831550598145,
 0.009163856506347656,
 0.01067817211151123,
 0.0046149492263793945,
 0.019084453582763672,
 0.013350486755371094,
 0.009339094161987305,
 0.0028172731399536133,
 0.0029038190841674805,
 0.002758502960205078,
 0.012760281562805176,
 0.006015419960021973,
 0.004072666168212891,
 0.024454712867736816,
 0.005753040313720703]

In [5]:
# merged lora

res

[0.013316988945007324,
 0.013686060905456543,
 0.0078051090240478516,
 0.009980559349060059,
 0.02069699764251709,
 0.0016776323318481445,
 0.02029287815093994,
 0.02415013313293457,
 0.05804133415222168,
 0.02527034282684326,
 0.04807102680206299,
 0.003994584083557129,
 0.021612167358398438,
 0.022856831550598145,
 0.009163856506347656,
 0.01067817211151123,
 0.0046149492263793945,
 0.019084453582763672,
 0.013350486755371094,
 0.009339094161987305,
 0.0028172731399536133,
 0.0029038190841674805,
 0.002758502960205078,
 0.012760281562805176,
 0.006015419960021973,
 0.004072666168212891,
 0.024454712867736816,
 0.005753040313720703]

In [6]:
np.array(res).mean()

0.014972120523452759

In [7]:
model.model.layers[0].self_attn.q_proj

Linear(in_features=3072, out_features=3072, bias=False)

In [8]:
import torch

score._score(torch.zeros_like(model.model.layers[0].self_attn.q_proj.weight))

1.0