In [1]:
import torch
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
from transformers import DataCollatorWithPadding
from pprint import pprint
from src.data import load_data, get_tokens_for_seq_classification  
from src.utils import (load_sequence_classification_model, 
                   add_adapters_to_bert_layers, 
                   freeze_all_bert_layers,
                   unfreeze_bert_adapters)
from src.adapter import AdapterConfig

import warnings
warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
sst2_dataset = load_data(("glue", "sst2"), cache_dir= "cache\\data")
sst2_dataset

DatasetDict({
    train: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 67349
    })
    validation: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 872
    })
    test: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 1821
    })
})

In [3]:
bert_model = load_sequence_classification_model("bert-base-uncased", 2)

adapter_cfg = AdapterConfig(hidden_dim=bert_model.config.hidden_size,
                            adapter_dim=64,
                            adapter_act=bert_model.config.hidden_act,
                            adapter_initializer_range=1e-2)

bert_model.bert = add_adapters_to_bert_layers(bert_model.bert, adapter_cfg)
bert_model.bert = freeze_all_bert_layers(bert_model.bert)
bert_model.bert = unfreeze_bert_adapters(bert_model.bert)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
sst2_tokens = get_tokens_for_seq_classification("bert-base-uncased", sst2_dataset)

Map: 100%|██████████| 872/872 [00:00<00:00, 5320.11 examples/s]


In [5]:
## Not needed when training with transformers Trainer
# from transformers import DataCollatorWithPadding
# from torch.utils.data import DataLoader

# data_collate = DataCollatorWithPadding(tokenizer=tokenizer)

# sst2_trainloader = DataLoader(sst2_tokenized["train"], 
#                        batch_size=8, 
#                        collate_fn=data_collate,
#                        shuffle=True)
# sst2_valloader = DataLoader(sst2_tokenized["validation"], 
#                      batch_size=8, 
#                      collate_fn=data_collate,
#                      shuffle=False)
# for batch in sst2_trainloader:
#     print(batch.keys())
#     break

# for batch in sst2_valloader:
#     print(batch.keys())
#     break

In [None]:
from transformers import TrainingArguments, Trainer
import accelerate
from sklearn.metrics import accuracy_score, f1_score
import numpy as np

n_epochs = 3
batch_size = 8
metric_name = "eval_accuracy"

training_args = TrainingArguments(
    learning_rate = 2e-5,
    weight_decay=0.01,
    num_train_epochs = n_epochs,
    per_device_train_batch_size = batch_size,
    per_device_eval_batch_size = batch_size,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    metric_for_best_model = metric_name,
    greater_is_better = True,
    load_best_model_at_end = True,
    logging_steps=50,
    output_dir = "cache\\checkpoints\\adapter_bert_sst2",
    logging_dir = "cache\\logs\\adapter_bert_sst2",
)

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return {'eval_accuracy': accuracy_score(labels, predictions),
            'eval_f1': f1_score(labels, predictions, average='macro')}

tokenizer = AutoTokenizer.from_pretrained(
        "bert-base-uncased",
        truncation=True,
        padding=True,
        use_fast=True,
        cache_dir="cache\\tokenizer",
    )

trainer = Trainer(model=bert_model, 
                  train_dataset=sst2_tokens["train"], 
                  eval_dataset=sst2_tokens["validation"], 
                  args=training_args, 
                #   tokenizer=tokenizer,
                  compute_metrics=compute_metrics)

trainer.train()
trainer.evalueate()
trainer.save_model("cache\\models\\adapter-bert-sentiment-analysis")

In [17]:
output = bert_model(input_ids = sst2_tokens["train"][:4]["input_ids"].cuda(),
                    attention_mask = sst2_tokens["train"][:4]["attention_mask"].cuda(),
                    labels = sst2_tokens["train"][:4]["labels"].cuda())
output

SequenceClassifierOutput(loss=tensor(0.8549, device='cuda:0', grad_fn=<NllLossBackward0>), logits=tensor([[-0.0040,  0.5968],
        [ 0.1314,  0.5220],
        [ 0.4034,  0.6201],
        [ 0.2548,  0.6052]], device='cuda:0', grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)