In [None]:
!pip3 install adapter-transformers

In [5]:
import torch, os
from torch import cuda
import numpy as np
# transformers
from adapters import AdapterTrainer
from transformers import (BertTokenizerFast, 
                          BertForSequenceClassification)
from transformers import Trainer, TrainingArguments
from datasets import load_dataset

device = 'cuda' if cuda.is_available() else 'cpu'

In [1]:
import torch
from datasets import load_dataset
from adapters import AutoAdapterModel, AdapterTrainer, AdapterConfig
from transformers import AutoTokenizer, TrainingArguments


  from .autonotebook import tqdm as notebook_tqdm


In [16]:

# Load the IMDB dataset
dataset = load_dataset("imdb")

# Load pre-trained BERT model and tokenizer
model_name = "bert-base-uncased"
model = AutoAdapterModel.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize_function(examples):
    result = tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)
    result["labels"] = examples["label"]
    return result

tokenized_datasets = dataset.map(tokenize_function, batched=True, remove_columns=dataset["train"].column_names)
tokenized_datasets.set_format("torch")



Map: 100%|██████████| 25000/25000 [00:06<00:00, 3989.54 examples/s]
Map: 100%|██████████| 25000/25000 [00:06<00:00, 3893.49 examples/s]
Map: 100%|██████████| 50000/50000 [00:12<00:00, 3946.04 examples/s]


In [17]:
# Add a new adapter
adapter_name = "imdb_sentiment"
model.add_adapter(adapter_name, config="pfeiffer")
model.add_classification_head(adapter_name, num_labels=2)
model.set_active_adapters(adapter_name)
model.train_adapter([adapter_name])



In [18]:
# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
)




In [19]:
# Initialize AdapterTrainer
trainer = AdapterTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
)


In [20]:
# Train the adapter
trainer.train()


  0%|          | 0/9375 [04:26<?, ?it/s]           

{'loss': 0.697, 'grad_norm': 2.010999917984009, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.0}



  0%|          | 0/9375 [04:30<?, ?it/s]           

{'loss': 0.7126, 'grad_norm': 2.0656707286834717, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.01}



  0%|          | 0/9375 [04:34<?, ?it/s]           

{'loss': 0.6976, 'grad_norm': 3.8423454761505127, 'learning_rate': 3e-06, 'epoch': 0.01}



  0%|          | 0/9375 [04:39<?, ?it/s]           

{'loss': 0.7141, 'grad_norm': 2.1756253242492676, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.01}



  0%|          | 0/9375 [04:43<?, ?it/s]           

{'loss': 0.6946, 'grad_norm': 5.4154372215271, 'learning_rate': 5e-06, 'epoch': 0.02}



  0%|          | 0/9375 [04:47<?, ?it/s]           

{'loss': 0.7031, 'grad_norm': 2.1245453357696533, 'learning_rate': 6e-06, 'epoch': 0.02}



  0%|          | 0/9375 [04:51<?, ?it/s]           

{'loss': 0.6993, 'grad_norm': 3.7083022594451904, 'learning_rate': 7.000000000000001e-06, 'epoch': 0.02}



  0%|          | 0/9375 [04:55<?, ?it/s]           

{'loss': 0.6927, 'grad_norm': 3.468797445297241, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.03}



  0%|          | 0/9375 [04:59<?, ?it/s]           

{'loss': 0.6947, 'grad_norm': 2.6199350357055664, 'learning_rate': 9e-06, 'epoch': 0.03}



  0%|          | 0/9375 [05:03<?, ?it/s]            

{'loss': 0.6904, 'grad_norm': 3.723456859588623, 'learning_rate': 1e-05, 'epoch': 0.03}



  0%|          | 0/9375 [05:07<?, ?it/s]            

{'loss': 0.6773, 'grad_norm': 5.025993347167969, 'learning_rate': 1.1000000000000001e-05, 'epoch': 0.04}



  0%|          | 0/9375 [05:11<?, ?it/s]            

{'loss': 0.6828, 'grad_norm': 3.3117024898529053, 'learning_rate': 1.2e-05, 'epoch': 0.04}



  0%|          | 0/9375 [05:16<?, ?it/s]            

{'loss': 0.6894, 'grad_norm': 2.592426061630249, 'learning_rate': 1.3000000000000001e-05, 'epoch': 0.04}



  0%|          | 0/9375 [05:20<?, ?it/s]            

{'loss': 0.6804, 'grad_norm': 3.2214226722717285, 'learning_rate': 1.4000000000000001e-05, 'epoch': 0.04}



  0%|          | 0/9375 [05:24<?, ?it/s]            

{'loss': 0.6718, 'grad_norm': 5.2319488525390625, 'learning_rate': 1.5e-05, 'epoch': 0.05}



  0%|          | 0/9375 [05:28<?, ?it/s]            

{'loss': 0.6733, 'grad_norm': 4.000744342803955, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.05}



  0%|          | 0/9375 [05:32<?, ?it/s]            

{'loss': 0.6377, 'grad_norm': 4.916184902191162, 'learning_rate': 1.7000000000000003e-05, 'epoch': 0.05}



  0%|          | 0/9375 [05:36<?, ?it/s]            

{'loss': 0.6593, 'grad_norm': 2.4713504314422607, 'learning_rate': 1.8e-05, 'epoch': 0.06}



  0%|          | 0/9375 [05:40<?, ?it/s]            

{'loss': 0.6509, 'grad_norm': 3.27276611328125, 'learning_rate': 1.9e-05, 'epoch': 0.06}



  0%|          | 0/9375 [05:45<?, ?it/s]            

{'loss': 0.6681, 'grad_norm': 1.7538334131240845, 'learning_rate': 2e-05, 'epoch': 0.06}



  0%|          | 0/9375 [05:49<?, ?it/s]            

{'loss': 0.6416, 'grad_norm': 2.5759153366088867, 'learning_rate': 2.1e-05, 'epoch': 0.07}



  0%|          | 0/9375 [05:53<?, ?it/s]            

{'loss': 0.6394, 'grad_norm': 2.9192869663238525, 'learning_rate': 2.2000000000000003e-05, 'epoch': 0.07}



  0%|          | 0/9375 [05:57<?, ?it/s]            

{'loss': 0.6314, 'grad_norm': 2.081392288208008, 'learning_rate': 2.3000000000000003e-05, 'epoch': 0.07}



  0%|          | 0/9375 [06:01<?, ?it/s]            

{'loss': 0.6298, 'grad_norm': 3.7699763774871826, 'learning_rate': 2.4e-05, 'epoch': 0.08}



  0%|          | 0/9375 [06:06<?, ?it/s]            

{'loss': 0.5964, 'grad_norm': 2.518214225769043, 'learning_rate': 2.5e-05, 'epoch': 0.08}



  0%|          | 0/9375 [06:10<?, ?it/s]            

{'loss': 0.5697, 'grad_norm': 1.8514018058776855, 'learning_rate': 2.6000000000000002e-05, 'epoch': 0.08}



  0%|          | 0/9375 [06:14<?, ?it/s]            

{'loss': 0.5791, 'grad_norm': 1.8715368509292603, 'learning_rate': 2.7000000000000002e-05, 'epoch': 0.09}



  0%|          | 0/9375 [06:18<?, ?it/s]            

{'loss': 0.555, 'grad_norm': 2.3606629371643066, 'learning_rate': 2.8000000000000003e-05, 'epoch': 0.09}



  0%|          | 0/9375 [06:23<?, ?it/s]            

{'loss': 0.5068, 'grad_norm': 1.7182750701904297, 'learning_rate': 2.9e-05, 'epoch': 0.09}



  0%|          | 0/9375 [06:27<?, ?it/s]            

{'loss': 0.4716, 'grad_norm': 3.0778517723083496, 'learning_rate': 3e-05, 'epoch': 0.1}



  0%|          | 0/9375 [06:31<?, ?it/s]            

{'loss': 0.3809, 'grad_norm': 2.3145031929016113, 'learning_rate': 3.1e-05, 'epoch': 0.1}



  0%|          | 0/9375 [06:35<?, ?it/s]            

{'loss': 0.4543, 'grad_norm': 1.7162362337112427, 'learning_rate': 3.2000000000000005e-05, 'epoch': 0.1}



  0%|          | 0/9375 [06:40<?, ?it/s]            

{'loss': 0.4422, 'grad_norm': 5.468442916870117, 'learning_rate': 3.3e-05, 'epoch': 0.11}



  0%|          | 0/9375 [06:44<?, ?it/s]            

{'loss': 0.2955, 'grad_norm': 2.599169969558716, 'learning_rate': 3.4000000000000007e-05, 'epoch': 0.11}



  0%|          | 0/9375 [06:48<?, ?it/s]            

{'loss': 0.2966, 'grad_norm': 1.3882026672363281, 'learning_rate': 3.5e-05, 'epoch': 0.11}



  0%|          | 0/9375 [06:52<?, ?it/s]            

{'loss': 0.3952, 'grad_norm': 7.11396598815918, 'learning_rate': 3.6e-05, 'epoch': 0.12}



  0%|          | 0/9375 [06:57<?, ?it/s]            

{'loss': 0.352, 'grad_norm': 1.865506649017334, 'learning_rate': 3.7e-05, 'epoch': 0.12}



  0%|          | 0/9375 [07:01<?, ?it/s]            

{'loss': 0.4073, 'grad_norm': 5.743044853210449, 'learning_rate': 3.8e-05, 'epoch': 0.12}



  0%|          | 0/9375 [07:05<?, ?it/s]            

{'loss': 0.3716, 'grad_norm': 1.79734206199646, 'learning_rate': 3.9000000000000006e-05, 'epoch': 0.12}



  0%|          | 0/9375 [07:09<?, ?it/s]            

{'loss': 0.2895, 'grad_norm': 3.1945784091949463, 'learning_rate': 4e-05, 'epoch': 0.13}



  0%|          | 0/9375 [07:14<?, ?it/s]            

{'loss': 0.5, 'grad_norm': 4.27720308303833, 'learning_rate': 4.1e-05, 'epoch': 0.13}



  0%|          | 0/9375 [07:18<?, ?it/s]            

{'loss': 0.232, 'grad_norm': 2.353832960128784, 'learning_rate': 4.2e-05, 'epoch': 0.13}



  0%|          | 0/9375 [07:22<?, ?it/s]            

{'loss': 0.31, 'grad_norm': 2.9257004261016846, 'learning_rate': 4.3e-05, 'epoch': 0.14}



  0%|          | 0/9375 [07:26<?, ?it/s]            

{'loss': 0.2811, 'grad_norm': 6.831552505493164, 'learning_rate': 4.4000000000000006e-05, 'epoch': 0.14}



  0%|          | 0/9375 [07:31<?, ?it/s]            

{'loss': 0.2891, 'grad_norm': 6.254624366760254, 'learning_rate': 4.5e-05, 'epoch': 0.14}



  0%|          | 0/9375 [07:35<?, ?it/s]            

{'loss': 0.2997, 'grad_norm': 1.0051600933074951, 'learning_rate': 4.600000000000001e-05, 'epoch': 0.15}



  0%|          | 0/9375 [07:39<?, ?it/s]            

{'loss': 0.3998, 'grad_norm': 20.090879440307617, 'learning_rate': 4.7e-05, 'epoch': 0.15}



  0%|          | 0/9375 [07:43<?, ?it/s]            

{'loss': 0.266, 'grad_norm': 2.1583898067474365, 'learning_rate': 4.8e-05, 'epoch': 0.15}



  0%|          | 0/9375 [07:47<?, ?it/s]            

{'loss': 0.3773, 'grad_norm': 4.903206825256348, 'learning_rate': 4.9e-05, 'epoch': 0.16}



  0%|          | 0/9375 [07:52<?, ?it/s]            

{'loss': 0.5339, 'grad_norm': 2.2866342067718506, 'learning_rate': 5e-05, 'epoch': 0.16}



  0%|          | 0/9375 [07:56<?, ?it/s]            

{'loss': 0.528, 'grad_norm': 4.721643447875977, 'learning_rate': 4.994366197183099e-05, 'epoch': 0.16}



  0%|          | 0/9375 [08:01<?, ?it/s]            

{'loss': 0.3847, 'grad_norm': 1.8541417121887207, 'learning_rate': 4.9887323943661973e-05, 'epoch': 0.17}



  0%|          | 0/9375 [08:05<?, ?it/s]            

{'loss': 0.343, 'grad_norm': 7.8743391036987305, 'learning_rate': 4.983098591549296e-05, 'epoch': 0.17}



  0%|          | 0/9375 [08:09<?, ?it/s]            

{'loss': 0.1943, 'grad_norm': 0.24501043558120728, 'learning_rate': 4.9774647887323944e-05, 'epoch': 0.17}



  0%|          | 0/9375 [08:13<?, ?it/s]            

{'loss': 0.46, 'grad_norm': 9.978816986083984, 'learning_rate': 4.971830985915493e-05, 'epoch': 0.18}



  0%|          | 0/9375 [08:18<?, ?it/s]          

{'loss': 0.2885, 'grad_norm': 4.689031600952148, 'learning_rate': 4.966197183098592e-05, 'epoch': 0.18}



  0%|          | 0/9375 [08:22<?, ?it/s]            

{'loss': 0.3012, 'grad_norm': 0.8160824775695801, 'learning_rate': 4.96056338028169e-05, 'epoch': 0.18}



  0%|          | 0/9375 [08:26<?, ?it/s]            

{'loss': 0.327, 'grad_norm': 2.713021755218506, 'learning_rate': 4.954929577464789e-05, 'epoch': 0.19}



  0%|          | 0/9375 [08:30<?, ?it/s]            

{'loss': 0.2851, 'grad_norm': 6.729391098022461, 'learning_rate': 4.949295774647887e-05, 'epoch': 0.19}



  0%|          | 0/9375 [08:35<?, ?it/s]            

{'loss': 0.2624, 'grad_norm': 0.4984869658946991, 'learning_rate': 4.9436619718309864e-05, 'epoch': 0.19}



  0%|          | 0/9375 [08:39<?, ?it/s]            

{'loss': 0.3488, 'grad_norm': 9.854047775268555, 'learning_rate': 4.938028169014084e-05, 'epoch': 0.2}



  0%|          | 0/9375 [08:43<?, ?it/s]            

{'loss': 0.2413, 'grad_norm': 4.315183639526367, 'learning_rate': 4.9323943661971835e-05, 'epoch': 0.2}



  0%|          | 0/9375 [08:47<?, ?it/s]            

{'loss': 0.337, 'grad_norm': 5.421668529510498, 'learning_rate': 4.926760563380282e-05, 'epoch': 0.2}



  0%|          | 0/9375 [08:52<?, ?it/s]            

{'loss': 0.4237, 'grad_norm': 6.858036994934082, 'learning_rate': 4.9211267605633806e-05, 'epoch': 0.2}



  0%|          | 0/9375 [08:56<?, ?it/s]            

{'loss': 0.3067, 'grad_norm': 3.015293836593628, 'learning_rate': 4.915492957746479e-05, 'epoch': 0.21}



  0%|          | 0/9375 [09:00<?, ?it/s]            

{'loss': 0.3121, 'grad_norm': 1.2972501516342163, 'learning_rate': 4.909859154929578e-05, 'epoch': 0.21}



  0%|          | 0/9375 [09:04<?, ?it/s]            

{'loss': 0.2543, 'grad_norm': 3.757408618927002, 'learning_rate': 4.904225352112676e-05, 'epoch': 0.21}



  0%|          | 0/9375 [09:09<?, ?it/s]            

{'loss': 0.2636, 'grad_norm': 1.1920865774154663, 'learning_rate': 4.898591549295775e-05, 'epoch': 0.22}




KeyboardInterrupt: 

In [15]:
# Add a new adapter
adapter_name = "imdb_sentiment"
model.add_adapter(adapter_name, config="pfeiffer")

# Activate the adapter
model.set_active_adapters(adapter_name)

# Prepare the model for adapter training
model.train_adapter([adapter_name])

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./logs",
)

# Initialize AdapterTrainer
trainer = AdapterTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
)


# Train the adapter
trainer.train()

  0%|          | 0/9375 [01:51<?, ?it/s]
  0%|          | 0/9375 [00:00<?, ?it/s]

ValueError: Expected input batch_size (4096) to match target batch_size (8).

In [13]:

# Add a new adapter
adapter_name = "imdb_sentiment"
model.add_adapter(adapter_name, config="pfeiffer")

# Activate the adapter
model.set_active_adapters(adapter_name)

# Prepare the model for adapter training
model.train_adapter([adapter_name])

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./logs",
)

# Initialize AdapterTrainer
trainer = AdapterTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
)


# Train the adapter
trainer.train()


ValueError: An adapter with the name 'imdb_sentiment' has already been added.

  0%|          | 0/9375 [03:31<?, ?it/s]
  0%|          | 0/9375 [00:00<?, ?it/s]

ValueError: Expected input batch_size (4096) to match target batch_size (8).

In [None]:
# Train the adapter
trainer.train()

# Save the adapter
model.save_adapter("./imdb_adapter", adapter_name)

# Load the adapter (for future use)
loaded_model = AutoAdapterModel.from_pretrained(model_name)
loaded_model.load_adapter("./imdb_adapter")

In [None]:
!pip install accelerate -U