In [42]:
from adapters import AutoAdapterModel, BnConfig, init
from transformers import AutoTokenizer, AutoModelForCausalLM
from adapters.trainer import AdapterTrainer
from transformers.training_args import TrainingArguments 
from datasets import Dataset
import pickle
from data_collator import DataCollator
from gnd_dataset import GNDDataset
from utils import PAD_TOKEN
import torch

In [5]:

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

gnd = pickle.load(open("data/gnd.pickle", "rb"))
model_name = "meta-llama/Llama-3.2-1B"
ds_path = "datasets/no_context"

In [90]:
cm_model = AutoAdapterModel.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(PAD_TOKEN)

bn_name = "bottleneck-adapter"
config = BnConfig(mh_adapter=True, output_adapter=True, reduction_factor=16, non_linearity="silu")
cm_model.add_adapter(bn_name, config=config)
cm_model.set_active_adapters(bn_name)
cm_model.train_adapter(bn_name)

There are adapters available but none are activated for the forward pass.


In [95]:
cm_model.active_adapters

Stack[bottleneck-adapter]

In [91]:
cm_model.train_adapter(bn_name)
trainable = 0
for n, p in cm_model.named_parameters():
    if p.requires_grad:
        trainable += p.numel()
        print(f"{n} {p.shape}")

model.layers.0.attention_adapters.adapters.bottleneck-adapter.adapter_down.0.weight torch.Size([128, 2048])
model.layers.0.attention_adapters.adapters.bottleneck-adapter.adapter_down.0.bias torch.Size([128])
model.layers.0.attention_adapters.adapters.bottleneck-adapter.adapter_up.weight torch.Size([2048, 128])
model.layers.0.attention_adapters.adapters.bottleneck-adapter.adapter_up.bias torch.Size([2048])
model.layers.0.output_adapters.adapters.bottleneck-adapter.adapter_down.0.weight torch.Size([128, 2048])
model.layers.0.output_adapters.adapters.bottleneck-adapter.adapter_down.0.bias torch.Size([128])
model.layers.0.output_adapters.adapters.bottleneck-adapter.adapter_up.weight torch.Size([2048, 128])
model.layers.0.output_adapters.adapters.bottleneck-adapter.adapter_up.bias torch.Size([2048])
model.layers.1.attention_adapters.adapters.bottleneck-adapter.adapter_down.0.weight torch.Size([128, 2048])
model.layers.1.attention_adapters.adapters.bottleneck-adapter.adapter_down.0.bias torc

In [92]:
model.active_head

'default'

In [68]:
cm_model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 2048)
    (layers): ModuleList(
      (0-15): 16 x LlamaDecoderLayerWithAdapters(
        (self_attn): LlamaAttentionWithAdapters(
          (q_proj): LoRALinearTorch(
            in_features=2048, out_features=2048, bias=False
            (shared_parameters): ModuleDict()
            (loras): ModuleDict()
          )
          (k_proj): LoRALinearTorch(
            in_features=2048, out_features=512, bias=False
            (shared_parameters): ModuleDict()
            (loras): ModuleDict()
          )
          (v_proj): LoRALinearTorch(
            in_features=2048, out_features=512, bias=False
            (shared_parameters): ModuleDict()
            (loras): ModuleDict()
          )
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (prefix_tuning): PrefixTuningLayer(
            (prefix_gates): ModuleDict()
            (pool): PrefixTuningPool(
              (prefix_tu

In [64]:

cm_model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(PAD_TOKEN)

bn_name = "bottleneck-adapter"
config = BnConfig(mh_adapter=True, output_adapter=True, reduction_factor=16, non_linearity="silu")
init(cm_model)
cm_model.add_causal_lm_head("lm_head")
# cm_model.set_active_adapters(bn_name)
adapter_name = cm_model.load_adapter("adapter_model/checkpoint-8000/bottleneck-adapter", set_active=True)
# cm_model.set_active_adapters(adapter_name)

AttributeError: 'LlamaForCausalLM' object has no attribute 'add_causal_lm_head'

In [38]:
data_collator = DataCollator(
    tokenizer=tokenizer,
    graph=gnd,
    device=DEVICE,
)

In [43]:
ds = GNDDataset(
    data_dir=ds_path,
    gnd_graph=gnd,
    load_from_disk=True

)

In [63]:
count = 0
num_instances = 10
for instance in ds["validate"]:
    count += 1
    if count > num_instances:
        break
    gold_labels = instance["label-names"]
    label_string = instance["label-string"]
    instance_batch = {k: torch.tensor(v) for k, v in instance.items() if k in ["attention_mask", "input_ids", "seq_lengths"]}
    input_ids = instance_batch["input_ids"]
    attention_mask = instance_batch["attention_mask"]
    sequence_length = instance_batch["seq_lengths"]
    input_ids = input_ids[:sequence_length]
    attention_mask = attention_mask[:sequence_length]
    inputs = {
        "input_ids": input_ids.unsqueeze(0),
        "attention_mask": attention_mask.unsqueeze(0),
    }
    output = cm_model.generate(**inputs, pad_token_id=tokenizer.eos_token_id)
    output_text = tokenizer.decode(output[0], skip_special_tokens=True)
    print(output_text)

Titel: Synökologische Studien zum simultanen Befall von Winterweizen (Triticum aestivum L.) mit Aphiden und getreidepathogenen Pilzen. Schlagwörter: 
Titel: Regeln für die Ansetzung von Körperschaftsnamen RAK-Körperschaften ; Bestimmungen d. RAK-WB, Grundbestimmungen d. RAK-ÖB, GKD-Erl. u. -Anh.. Schlagwörter: 


Titel: Kleines Fernsprechbuch für Lübeck und Umgegend. Schlagwörter: 
Titel: Spurensuche im Schlossmuseum Jever Beiträge zur Provenienzforschung, Sammlungs- und Vereinsgeschichte. Schlagwörter: 
Titel: Der Bitcoin-Standard die dezentrale Alternative zum Zentralbankensystem. Schlagwörter: 
Titel: Samuel Tillerman, der Läufer. Schlagwörter: 4. Reiterter; Läder; Schuljung; Schüler; Schuljä
Titel: Charakterisierung der Schilddrüsenfunktion und Nachweis eines Promotordefektes als Ursache des kompletten Thyroxin-bindenden Globulin-Mangels beim Hund. Schlagwörter: 
Titel: Indikatoren und Verpackungen für in der Endverpackung zu sterilisierende Medizinprodukte Normen. Schlagwörter: 
Titel: Das Findelkind vom Watt. Schlagwörter: 5. Sammlungsliteratur; Soldat von Soldat von Soldat von Soldat
Titel: Pronomen und Pronominalklitika im Cimbro Untersuchungen zum grammatischen Wandel einer deutschen Minderheitensprache in romanischer Umgebung. Schlagwörter: 


In [56]:
cm_model.adapter_summary()

