## Adapter composition Test
* https://github.com/adapter-hub/adapters/blob/main/notebooks/07a_Adapter_train_NER_with_id2label.ipynb


In [1]:
!pip install -Uq adapters
!pip install -q datasets
!pip install -q scikit-learn
!pip install -Uq accelerate
!pip install -Uq seqeval

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m229.9/229.9 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m521.2/521.2 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m261.4/261.4 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m927.1 kB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for seqeval (setup.py) ... [?25l[?25hdone


In [2]:
!pip install evaluate

Collecting evaluate
  Downloading evaluate-0.4.1-py3-none-any.whl (84 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
Collecting responses<0.19 (from evaluate)
  Downloading responses-0.18.0-py3-none-any.whl (38 kB)
Installing collected packages: responses, evaluate
Successfully installed evaluate-0.4.1 responses-0.18.0


In [3]:
from datasets import load_dataset
from adapters.composition import Stack, Parallel

## TASK 1

In [4]:
from adapters import AutoAdapterModel
from transformers import AutoTokenizer, AutoConfig
from datasets import load_dataset
from torch.utils.data import Dataset
import torch
import torch.nn.functional as F
from tqdm.notebook import tqdm
from torch import nn
#The labels for the NER task and the dictionaries to map the to ids or
#the other way around
labels = ["O", 'B-LOC', "I-LOC", "B-PER", "I-PER", "B-ORG", "I-ORG", "B-MISC", "I-MISC"]
id2label = {id_: label for id_, label in enumerate(labels)}
label2id = {label: id_ for id_, label in enumerate(labels)}

#model_name = "bert-base-uncased"
model_name = "google/bert_uncased_L-2_H-128_A-2"
config = AutoConfig.from_pretrained(model_name, num_label=len(labels), id2label=id2label, label2id=label2id)
model = AutoAdapterModel.from_pretrained(model_name)
model.add_adapter("ner")

model.add_tagging_head("ner_head", num_labels=len(labels), id2label=id2label)
tokenizer = AutoTokenizer.from_pretrained(model_name)

print(model.get_labels())

config.json:   0%|          | 0.00/382 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/17.7M [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

['O', 'B-LOC', 'I-LOC', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-MISC', 'I-MISC']


In [5]:
def encode_data(data):
    encoded = tokenizer([" ".join(doc) for doc in data["tokens"]], pad_to_max_length=True, padding="max_length",
                        max_length=512, truncation=True, add_special_tokens=True)
    return (encoded)


def encode_labels(example):
    r_tags = []
    count = 0
    token2word = []
    for index, token in enumerate(tokenizer.tokenize(" ".join(example["tokens"]))):
        if token.startswith("##") or (token in example["tokens"][index - count - 1].lower() and index - count - 1 >= 0):
            # If the token is part of a larger token and not the first we need to differentiate.
            # If it is a B (beginning) label the next one needs to be assigned an I (intermediate) label.
            # Otherwise they can be labeled the same.
            if r_tags[-1] % 2 == 1:
                r_tags.append(r_tags[-1] + 1)
            else:
                r_tags.append(r_tags[-1])
            count += 1
        else:
            r_tags.append(example["ner_tags"][index - count])

        token2word.append(index - count)
    r_tags = torch.tensor(r_tags)
    labels = {}
    # Pad token to maximum length for using batches
    labels["labels"] = F.pad(r_tags, pad=(1, 511 - r_tags.shape[0]), mode='constant', value=0)
    # Truncate if the document is too long
    labels["labels"] = labels["labels"][:512]
    return labels

In [6]:
dataset = load_dataset("conll2003")
dataset = dataset.map(encode_labels)
dataset = dataset.map(encode_data, batched=True, batch_size=10)

dataset.set_format(type='torch', columns=['input_ids', 'token_type_ids', 'attention_mask', 'labels'])

#dataloader = torch.utils.data.DataLoader(dataset["train"])
#evaluate_dataloader = torch.utils.data.DataLoader(dataset["test"])

Downloading builder script:   0%|          | 0.00/9.57k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/3.73k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/12.3k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/983k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/14041 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/3250 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/3453 [00:00<?, ? examples/s]

Map:   0%|          | 0/14041 [00:00<?, ? examples/s]

Map:   0%|          | 0/3250 [00:00<?, ? examples/s]

Map:   0%|          | 0/3453 [00:00<?, ? examples/s]

Map:   0%|          | 0/14041 [00:00<?, ? examples/s]

Map:   0%|          | 0/3250 [00:00<?, ? examples/s]

Map:   0%|          | 0/3453 [00:00<?, ? examples/s]

In [7]:
dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags', 'labels', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 14041
    })
    validation: Dataset({
        features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags', 'labels', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 3250
    })
    test: Dataset({
        features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags', 'labels', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 3453
    })
})

In [8]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)
model.set_active_adapters([["ner"]])
model.train_adapter(["ner"])

In [9]:
from transformers import TrainingArguments, Trainer
from adapters import AdapterTrainer
from datasets import load_metric
import evaluate
import numpy as np

# Metrics
metric = load_metric("seqeval")

def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)
    label_list = id2label

    # Remove ignored index (special tokens)
    true_predictions = [
        [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    true_labels = [
        [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    results = metric.compute(predictions=true_predictions, references=true_labels)
    return {
        "precision": results["overall_precision"],
        "recall": results["overall_recall"],
        "f1": results["overall_f1"],
        "accuracy": results["overall_accuracy"],
    }


training_args = TrainingArguments(
        output_dir="./",
        evaluation_strategy="epoch",
        learning_rate= 1e-3,
        num_train_epochs= 1,
        logging_dir="./logs",
        log_level="info",
        logging_steps=10,
        report_to="all")

trainer = AdapterTrainer(
            model=model,
            args=training_args,
            train_dataset=dataset['train'],
            eval_dataset=dataset['validation'],
            compute_metrics=compute_metrics)

  metric = load_metric("seqeval")


Downloading builder script:   0%|          | 0.00/2.47k [00:00<?, ?B/s]

In [10]:
trainer.train()

The following columns in the training set don't have a corresponding argument in `BertAdapterModel.forward` and have been ignored: ner_tags, tokens, chunk_tags, id, pos_tags. If ner_tags, tokens, chunk_tags, id, pos_tags are not expected by `BertAdapterModel.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 14,041
  Num Epochs = 1
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 1,756
  Number of trainable parameters = 5,529


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.3188,0.298362,0.079267,0.557877,0.138811,0.948243


The following columns in the evaluation set don't have a corresponding argument in `BertAdapterModel.forward` and have been ignored: ner_tags, tokens, chunk_tags, id, pos_tags. If ner_tags, tokens, chunk_tags, id, pos_tags are not expected by `BertAdapterModel.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 3250
  Batch size = 8


Training completed. Do not forget to share your model on huggingface.co/models =)




TrainOutput(global_step=1756, training_loss=0.4082722936784488, metrics={'train_runtime': 83.6918, 'train_samples_per_second': 167.77, 'train_steps_per_second': 20.982, 'total_flos': 18066267589632.0, 'train_loss': 0.4082722936784488, 'epoch': 1.0})

In [11]:
trainer.evaluate()

The following columns in the evaluation set don't have a corresponding argument in `BertAdapterModel.forward` and have been ignored: ner_tags, tokens, chunk_tags, id, pos_tags. If ner_tags, tokens, chunk_tags, id, pos_tags are not expected by `BertAdapterModel.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 3250
  Batch size = 8


{'eval_loss': 0.29836246371269226,
 'eval_precision': 0.07926741680632032,
 'eval_recall': 0.55787700084246,
 'eval_f1': 0.13881144534115922,
 'eval_accuracy': 0.948243389423077,
 'eval_runtime': 46.0774,
 'eval_samples_per_second': 70.533,
 'eval_steps_per_second': 8.833,
 'epoch': 1.0}

In [12]:
def predict(sentence):
  tokens = tokenizer.encode(
        sentence,
        return_tensors="pt",
    )

  model.eval()
  preds = model(tokens, adapter_names=['ner'])[0]
  preds = preds.detach().numpy()
  preds = np.argmax(preds, axis=2)
  return tokenizer.tokenize(sentence), preds.squeeze()[1:-1]

In [13]:
#example_text="Germany's representative to the European Union\'s veterinary committee Werner Zwingmann said on Wednesday consumers should buy sheepmeat from countries other than Britain until the scientific advice was clearer."
# Get the mapping of ids to labels
#label_map = model.get_labels_dict()
#tokens, preds = predict(example_text)
#for token, pred in zip(tokens, preds):
#  print(f"{token}({label_map[pred]}) ", end="")

In [14]:
model.save_adapter('adapter/', 'ner') # use save adapter with heads
model.save_head("head/", "ner_head")

## TASK 2

In [15]:
dataset2 = load_dataset("rotten_tomatoes")
dataset2.num_rows

Downloading builder script:   0%|          | 0.00/5.03k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/2.02k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/7.25k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/488k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/8530 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1066 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1066 [00:00<?, ? examples/s]

{'train': 8530, 'validation': 1066, 'test': 1066}

In [16]:

dataset2['train'][0]

{'text': 'the rock is destined to be the 21st century\'s new " conan " and that he\'s going to make a splash even greater than arnold schwarzenegger , jean-claud van damme or steven segal .',
 'label': 1}

In [17]:

from transformers import RobertaTokenizer,AutoTokenizer

tokenizer2 = AutoTokenizer.from_pretrained("google/bert_uncased_L-2_H-128_A-2")

def encode_batch(batch):
  """Encodes a batch of input data using the model tokenizer."""
  return tokenizer2(batch["text"], max_length=80, truncation=True, padding="max_length")

# Encode the input data
dataset2 = dataset2.map(encode_batch, batched=True)
# The transformers model expects the target class column to be named "labels"
dataset2 = dataset2.rename_column(original_column_name="label", new_column_name="labels")
# Transform to pytorch tensors and only output the required columns
dataset2.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--google--bert_uncased_L-2_H-128_A-2/snapshots/30b0a37ccaaa32f332884b96992754e246e48c5f/config.json
Model config BertConfig {
  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 128,
  "initializer_range": 0.02,
  "intermediate_size": 512,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 2,
  "num_hidden_layers": 2,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.35.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file vocab.txt from cache at /root/.cache/huggingface/hub/models--google--bert_uncased_L-2_H-128_A-2/snapshots/30b0a37cc

Map:   0%|          | 0/8530 [00:00<?, ? examples/s]

Map:   0%|          | 0/1066 [00:00<?, ? examples/s]

Map:   0%|          | 0/1066 [00:00<?, ? examples/s]

In [18]:
from transformers import RobertaConfig, AutoConfig
from adapters import AutoAdapterModel

config = AutoConfig.from_pretrained(
    "google/bert_uncased_L-2_H-128_A-2",
    num_labels=2,
)
model2 = AutoAdapterModel.from_pretrained(
    "google/bert_uncased_L-2_H-128_A-2",
    config=config,
)

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--google--bert_uncased_L-2_H-128_A-2/snapshots/30b0a37ccaaa32f332884b96992754e246e48c5f/config.json
Model config BertConfig {
  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 128,
  "initializer_range": 0.02,
  "intermediate_size": 512,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 2,
  "num_hidden_layers": 2,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.35.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--google--bert_uncased_L-2_H-128_A-2/snapshots/30b0a37ccaaa32f332884b96992754e246e48c5f/model.safetensors
Generate config GenerationC

In [19]:

# Add a new adapter
model2.add_adapter("rotten_tomatoes")
# Add a matching classification head
model2.add_classification_head(
    "rotten_tomatoes",
    num_labels=2,
    id2label={ 0: "👎", 1: "👍"}
  )
# Activate the adapter
model2.train_adapter("rotten_tomatoes")

In [20]:
from transformers import TrainingArguments, EvalPrediction
training_args2 = TrainingArguments(
    learning_rate=1e-4,
    num_train_epochs=10,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    logging_steps=200,
    output_dir="./training_output",
    overwrite_output_dir=True,
    # The next line is important to ensure the dataset labels are properly passed to the model
    remove_unused_columns=False,
)

def compute_accuracy2(p: EvalPrediction):
  preds = np.argmax(p.predictions, axis=1)
  return {"acc": (preds == p.label_ids).mean()}

trainer2 = AdapterTrainer(
    model=model2,
    args=training_args2,
    train_dataset=dataset2["train"],
    eval_dataset=dataset2["validation"],
    compute_metrics=compute_accuracy2,
)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [21]:

trainer2.train()

***** Running training *****
  Num examples = 8,530
  Num Epochs = 10
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 2,670
  Number of trainable parameters = 21,138


Step,Training Loss
200,0.6747
400,0.6376
600,0.6207
800,0.6165
1000,0.6178
1200,0.6115
1400,0.6117
1600,0.6104
1800,0.609
2000,0.6056




Training completed. Do not forget to share your model on huggingface.co/models =)




TrainOutput(global_step=2670, training_loss=0.6183534272154618, metrics={'train_runtime': 36.0501, 'train_samples_per_second': 2366.148, 'train_steps_per_second': 74.063, 'total_flos': 17788120800000.0, 'train_loss': 0.6183534272154618, 'epoch': 10.0})

In [22]:

trainer2.evaluate()

***** Running Evaluation *****
  Num examples = 1066
  Batch size = 32


{'eval_loss': 0.5740393400192261,
 'eval_acc': 0.702626641651032,
 'eval_runtime': 0.3012,
 'eval_samples_per_second': 3539.06,
 'eval_steps_per_second': 112.878,
 'epoch': 10.0}

In [23]:
from transformers import TextClassificationPipeline

classifier = TextClassificationPipeline(model=model2, tokenizer=tokenizer2,device=training_args.device.index)

classifier("This is awesome!")

The model 'BertAdapterModel' is not supported for . Supported models are ['AlbertForSequenceClassification', 'BartForSequenceClassification', 'BertForSequenceClassification', 'BigBirdForSequenceClassification', 'BigBirdPegasusForSequenceClassification', 'BioGptForSequenceClassification', 'BloomForSequenceClassification', 'CamembertForSequenceClassification', 'CanineForSequenceClassification', 'LlamaForSequenceClassification', 'ConvBertForSequenceClassification', 'CTRLForSequenceClassification', 'Data2VecTextForSequenceClassification', 'DebertaForSequenceClassification', 'DebertaV2ForSequenceClassification', 'DistilBertForSequenceClassification', 'ElectraForSequenceClassification', 'ErnieForSequenceClassification', 'ErnieMForSequenceClassification', 'EsmForSequenceClassification', 'FalconForSequenceClassification', 'FlaubertForSequenceClassification', 'FNetForSequenceClassification', 'FunnelForSequenceClassification', 'GPT2ForSequenceClassification', 'GPT2ForSequenceClassification', 'GP

[{'label': '👍', 'score': 0.5015659928321838}]

In [None]:

model2.save_adapter("./final_adapter", "rotten_tomatoes",with_head=True)

!ls -lh final_adapter

total 100K
-rw-r--r-- 1 root root 1.1K Nov 28 02:02 adapter_config.json
-rw-r--r-- 1 root root  458 Nov 28 02:02 head_config.json
-rw-r--r-- 1 root root  21K Nov 28 02:02 pytorch_adapter.bin
-rw-r--r-- 1 root root  68K Nov 28 02:02 pytorch_model_head.bin


## Stack Adapter - Inference

In [None]:
import adapters.composition as ac
from transformers import AutoConfig
from adapters import AutoAdapterModel

config = AutoConfig.from_pretrained(
    "google/bert_uncased_L-2_H-128_A-2",
    num_labels=2,
)
model3 = AutoAdapterModel.from_pretrained(
    "google/bert_uncased_L-2_H-128_A-2",
    config=config,
)

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--google--bert_uncased_L-2_H-128_A-2/snapshots/30b0a37ccaaa32f332884b96992754e246e48c5f/config.json
Model config BertConfig {
  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 128,
  "initializer_range": 0.02,
  "intermediate_size": 512,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 2,
  "num_hidden_layers": 2,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.35.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--google--bert_uncased_L-2_H-128_A-2/snapshots/30b0a37ccaaa32f332884b96992754e246e48c5f/model.safetensors
Generate config GenerationC

In [None]:
ner = model3.load_adapter("./adapter")
#model3.load_head('./head')
sentiment = model3.load_adapter("./final_adapter")
model3.active_adapters = ac.Stack(ner,sentiment)
#model3.active_head = "ner_head"


In [None]:
tokenizer3 = AutoTokenizer.from_pretrained("google/bert_uncased_L-2_H-128_A-2")

# How you can acces the labels and the mapping for a pretrained head
print(model3.get_labels())
print(model3.get_labels_dict())

Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--google--bert_uncased_L-2_H-128_A-2/snapshots/30b0a37ccaaa32f332884b96992754e246e48c5f/config.json
Model config BertConfig {
  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 128,
  "initializer_range": 0.02,
  "intermediate_size": 512,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 2,
  "num_hidden_layers": 2,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.35.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file vocab.txt from cache at /root/.cache/huggingface/hub/models--google--bert_uncased_L-2_H-128_A-2/snapshots/30b0a37cc

['👍', '👎']
{1: '👍', 0: '👎'}


In [None]:
from transformers import TextClassificationPipeline

classifier = TextClassificationPipeline(model=model3, tokenizer=tokenizer3,device=training_args.device.index)

classifier("This is awesome!")

The model 'BertAdapterModel' is not supported for . Supported models are ['AlbertForSequenceClassification', 'BartForSequenceClassification', 'BertForSequenceClassification', 'BigBirdForSequenceClassification', 'BigBirdPegasusForSequenceClassification', 'BioGptForSequenceClassification', 'BloomForSequenceClassification', 'CamembertForSequenceClassification', 'CanineForSequenceClassification', 'LlamaForSequenceClassification', 'ConvBertForSequenceClassification', 'CTRLForSequenceClassification', 'Data2VecTextForSequenceClassification', 'DebertaForSequenceClassification', 'DebertaV2ForSequenceClassification', 'DistilBertForSequenceClassification', 'ElectraForSequenceClassification', 'ErnieForSequenceClassification', 'ErnieMForSequenceClassification', 'EsmForSequenceClassification', 'FalconForSequenceClassification', 'FlaubertForSequenceClassification', 'FNetForSequenceClassification', 'FunnelForSequenceClassification', 'GPT2ForSequenceClassification', 'GPT2ForSequenceClassification', 'GP

[{'label': 'LABEL_1', 'score': 0.5723664164543152}]

## Train Stack Adapter

In [None]:
import adapters.composition as ac
from transformers import AutoConfig
from adapters import AutoAdapterModel

config = AutoConfig.from_pretrained(
    "google/bert_uncased_L-2_H-128_A-2")
model4 = AutoAdapterModel.from_pretrained(
    "google/bert_uncased_L-2_H-128_A-2",
    config=config,
)

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--google--bert_uncased_L-2_H-128_A-2/snapshots/30b0a37ccaaa32f332884b96992754e246e48c5f/config.json
Model config BertConfig {
  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 128,
  "initializer_range": 0.02,
  "intermediate_size": 512,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 2,
  "num_hidden_layers": 2,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.35.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--google--bert_uncased_L-2_H-128_A-2/snapshots/30b0a37ccaaa32f332884b96992754e246e48c5f/model.safetensors
Generate config GenerationC

In [None]:
ner = model4.load_adapter("./adapter")
# Add a new adapter
model4.add_adapter("rotten_tomatoes_stack")
# Add a matching classification head
model4.add_classification_head(
    "rotten_tomatoes_stack",
    num_labels=2,
    id2label={ 0: "👎", 1: "👍"}
  )
# Activate the adapter
model4.active_adapters = ac.Stack(ner,"rotten_tomatoes_stack")

# activate only sentiment
# model4.active_adapters = "rotten_tomatoes_stack"

In [None]:
model4.train_adapter(ac.Stack(ner,"rotten_tomatoes_stack"))

In [None]:
#for name,layer in model4.named_parameters():
#  print(name,layer.requires_grad)

In [None]:
from transformers import TrainingArguments, EvalPrediction
training_args2 = TrainingArguments(
    learning_rate=1e-4,
    num_train_epochs=10,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    logging_steps=200,
    output_dir="./training_output",
    overwrite_output_dir=True,
    # The next line is important to ensure the dataset labels are properly passed to the model
    remove_unused_columns=False,
)

def compute_accuracy2(p: EvalPrediction):
  preds = np.argmax(p.predictions, axis=1)
  return {"acc": (preds == p.label_ids).mean()}

trainer4 = AdapterTrainer(
    model=model4,
    args=training_args2,
    train_dataset=dataset2["train"],
    eval_dataset=dataset2["validation"],
    compute_metrics=compute_accuracy2,
)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [None]:
trainer4.train()

***** Running training *****
  Num examples = 8,530
  Num Epochs = 10
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 2,670
  Number of trainable parameters = 25,506


Step,Training Loss
200,0.682
400,0.6416
600,0.6222
800,0.6186
1000,0.6199
1200,0.6123
1400,0.6106
1600,0.6098
1800,0.609
2000,0.6031




Training completed. Do not forget to share your model on huggingface.co/models =)




TrainOutput(global_step=2670, training_loss=0.6191720869657252, metrics={'train_runtime': 37.6569, 'train_samples_per_second': 2265.187, 'train_steps_per_second': 70.903, 'total_flos': 17966964192000.0, 'train_loss': 0.6191720869657252, 'epoch': 10.0})

In [None]:
trainer4.evaluate()

***** Running Evaluation *****
  Num examples = 1066
  Batch size = 32


{'eval_loss': 0.5713135004043579,
 'eval_acc': 0.698874296435272,
 'eval_runtime': 0.2717,
 'eval_samples_per_second': 3923.987,
 'eval_steps_per_second': 125.155,
 'epoch': 10.0}

## Parallel Adapter Inference

In [None]:
import adapters.composition as ac
from transformers import AutoConfig
from adapters import AutoAdapterModel


model5 = AutoAdapterModel.from_pretrained(
    "google/bert_uncased_L-2_H-128_A-2")
# https://docs.adapterhub.ml/prediction_heads.html?highlight=load_head
sentiment = model5.load_adapter("./final_adapter",with_head=True)
ner = model5.load_adapter("./adapter")
model5.load_head('./head')


model5.active_adapters = ac.Parallel(ner, sentiment)

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--google--bert_uncased_L-2_H-128_A-2/snapshots/30b0a37ccaaa32f332884b96992754e246e48c5f/config.json
Model config BertConfig {
  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 128,
  "initializer_range": 0.02,
  "intermediate_size": 512,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 2,
  "num_hidden_layers": 2,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.35.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--google--bert_uncased_L-2_H-128_A-2/snapshots/30b0a37ccaaa32f332884b96992754e246e48c5f/model.safetensors
Generate config GenerationC

In [None]:
model5.active_head = ['ner_head','rotten_tomatoes']

In [None]:
model5.set_active_adapters(ac.Parallel(ner, sentiment))

In [None]:
model5.get_labels_dict('rotten_tomatoes')

{1: '👍', 0: '👎'}

In [None]:
import torch

def analyze_sentence(sentence):
  tokens = tokenizer3.tokenize(sentence)
  input_ids = torch.tensor(tokenizer3.convert_tokens_to_ids(tokens))
  outputs = model5(input_ids)

  # Post-process NER output
  ner_labels_map = model5.get_labels_dict('ner_head')
  ner_label_ids = torch.argmax(outputs[0].logits, dim=2).numpy().squeeze().tolist()
  ner_labels = [ner_labels_map[id_] for id_ in ner_label_ids]
  annotated = []
  for token, label_id in zip(tokens, ner_label_ids):
    token = token.replace('\u0120', '')
    label = ner_labels_map[label_id]
    annotated.append(f"{token}<{label}>")
  print("NER: " + " ".join(annotated))

  # Post-process sentiment output
  sentiment_labels = model5.get_labels_dict(sentiment)
  label_id = torch.argmax(outputs[1].logits).item()
  print("Sentiment: " + sentiment_labels[label_id])
  print()

In [None]:
sentences = [
  "A man in central Germany tried to leave his house by the front door only to find a brick wall there.",
  "The Met Office has issued a yellow weather warning for ice across most of Wales.",
  "A vibrant animation telling stories of indigenous Australia will be projected on to the Sydney Opera House every night at sunset."
]

for sentence in sentences:
  analyze_sentence(sentence)

NER: a<O> man<O> in<O> central<B-ORG> germany<B-ORG> tried<O> to<O> leave<O> his<O> house<O> by<O> the<O> front<O> door<O> only<O> to<O> find<O> a<O> brick<O> wall<O> there<O> .<O>
Sentiment: 👎

Sentiment: 👎

NER: a<O> vibrant<O> animation<O> telling<O> stories<O> of<O> indigenous<B-ORG> australia<B-ORG> will<O> be<O> projected<O> on<O> to<O> the<O> sydney<B-ORG> opera<O> house<O> every<O> night<O> at<O> sunset<B-ORG> .<O>
Sentiment: 👍



In [None]:
for name,layer in model5.named_parameters():
  print(name,layer.requires_grad)

bert.embeddings.word_embeddings.weight True
bert.embeddings.position_embeddings.weight True
bert.embeddings.token_type_embeddings.weight True
bert.embeddings.LayerNorm.weight True
bert.embeddings.LayerNorm.bias True
bert.encoder.layer.0.attention.self.query.weight True
bert.encoder.layer.0.attention.self.query.bias True
bert.encoder.layer.0.attention.self.key.weight True
bert.encoder.layer.0.attention.self.key.bias True
bert.encoder.layer.0.attention.self.value.weight True
bert.encoder.layer.0.attention.self.value.bias True
bert.encoder.layer.0.attention.output.dense.weight True
bert.encoder.layer.0.attention.output.dense.bias True
bert.encoder.layer.0.attention.output.LayerNorm.weight True
bert.encoder.layer.0.attention.output.LayerNorm.bias True
bert.encoder.layer.0.intermediate.dense.weight True
bert.encoder.layer.0.intermediate.dense.bias True
bert.encoder.layer.0.output.dense.weight True
bert.encoder.layer.0.output.dense.bias True
bert.encoder.layer.0.output.LayerNorm.weight True


## Train Adapter fusion

In [None]:
model6 = AutoAdapterModel.from_pretrained(
    "google/bert_uncased_L-2_H-128_A-2")
tokenizer6 = AutoTokenizer.from_pretrained("google/bert_uncased_L-2_H-128_A-2")

ner = model6.load_adapter("./adapter")
sentiment = model6.load_adapter("./final_adapter",with_head=False)
model6.add_classification_head(
    "cb",
    num_labels=2,
    id2label={ 0: "👎", 1: "👍"}
  )

adapter_setup = ac.Fuse(ner,sentiment)
model6.add_adapter_fusion(adapter_setup)
# Activate the adapter
model6.set_active_adapters(adapter_setup)
model6.train_fusion(adapter_setup)



loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--google--bert_uncased_L-2_H-128_A-2/snapshots/30b0a37ccaaa32f332884b96992754e246e48c5f/config.json
Model config BertConfig {
  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 128,
  "initializer_range": 0.02,
  "intermediate_size": 512,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 2,
  "num_hidden_layers": 2,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.35.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--google--bert_uncased_L-2_H-128_A-2/snapshots/30b0a37ccaaa32f332884b96992754e246e48c5f/model.safetensors
Generate config GenerationC

In [None]:
for name,layer in model6.named_parameters():
  print(name,layer.requires_grad)

bert.embeddings.word_embeddings.weight False
bert.embeddings.position_embeddings.weight False
bert.embeddings.token_type_embeddings.weight False
bert.embeddings.LayerNorm.weight False
bert.embeddings.LayerNorm.bias False
bert.encoder.layer.0.attention.self.query.weight False
bert.encoder.layer.0.attention.self.query.bias False
bert.encoder.layer.0.attention.self.key.weight False
bert.encoder.layer.0.attention.self.key.bias False
bert.encoder.layer.0.attention.self.value.weight False
bert.encoder.layer.0.attention.self.value.bias False
bert.encoder.layer.0.attention.output.dense.weight False
bert.encoder.layer.0.attention.output.dense.bias False
bert.encoder.layer.0.attention.output.LayerNorm.weight False
bert.encoder.layer.0.attention.output.LayerNorm.bias False
bert.encoder.layer.0.intermediate.dense.weight False
bert.encoder.layer.0.intermediate.dense.bias False
bert.encoder.layer.0.output.dense.weight False
bert.encoder.layer.0.output.dense.bias False
bert.encoder.layer.0.output.Lay

In [None]:
from transformers import TrainingArguments, EvalPrediction
training_args2 = TrainingArguments(
    learning_rate=1e-4,
    num_train_epochs=10,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    logging_steps=200,
    output_dir="./training_output",
    overwrite_output_dir=True,
    # The next line is important to ensure the dataset labels are properly passed to the model
    remove_unused_columns=False,
)

def compute_accuracy2(p: EvalPrediction):
  preds = np.argmax(p.predictions, axis=1)
  return {"acc": (preds == p.label_ids).mean()}

trainer6 = AdapterTrainer(
    model=model6,
    args=training_args2,
    train_dataset=dataset2["train"],
    eval_dataset=dataset2["validation"],
    compute_metrics=compute_accuracy2,
)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [None]:
trainer6.train()

***** Running training *****
  Num examples = 8,530
  Num Epochs = 10
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 2,670
  Number of trainable parameters = 115,586


Step,Training Loss
200,0.6574
400,0.6215
600,0.6042
800,0.6111
1000,0.6117
1200,0.6066
1400,0.6056
1600,0.6039
1800,0.6059
2000,0.5993




Training completed. Do not forget to share your model on huggingface.co/models =)




TrainOutput(global_step=2670, training_loss=0.6103502252128686, metrics={'train_runtime': 45.2848, 'train_samples_per_second': 1883.636, 'train_steps_per_second': 58.96, 'total_flos': 22012886496000.0, 'train_loss': 0.6103502252128686, 'epoch': 10.0})

In [None]:
trainer6.evaluate()

***** Running Evaluation *****
  Num examples = 1066
  Batch size = 32


{'eval_loss': 0.5676671266555786,
 'eval_acc': 0.698874296435272,
 'eval_runtime': 0.3995,
 'eval_samples_per_second': 2668.016,
 'eval_steps_per_second': 85.096,
 'epoch': 10.0}