In [1]:
!pip install pytorch-adapt

Collecting pytorch-adapt
  Downloading pytorch_adapt-0.0.81-py3-none-any.whl (157 kB)
Collecting pytorch-metric-learning>=1.5.2
  Downloading pytorch_metric_learning-1.6.3-py3-none-any.whl (111 kB)
Collecting torchmetrics>=0.9.3
  Downloading torchmetrics-0.10.3-py3-none-any.whl (529 kB)
Installing collected packages: torchmetrics, pytorch-metric-learning, pytorch-adapt
Successfully installed pytorch-adapt-0.0.81 pytorch-metric-learning-1.6.3 torchmetrics-0.10.3


In [2]:
import pandas as pd
from transformers import AutoAdapterModel, AdapterConfig, AutoTokenizer
import torch
import numpy as np
from sklearn.metrics import f1_score, balanced_accuracy_score

In [89]:

en_train = pd.read_csv("data/en_all.csv")
en_train, en_test = np.split(en_train.sample(frac=1, random_state=42), [int(.8*len(en_train))])

am_train = pd.read_csv('data/am/am_train_translated.csv').rename(columns={'tweet':'text', 'label':'labels'})
am_train, am_dev = np.split(
    am_train.sample(frac=1, random_state=42), [int(.7*len(am_train))])

# combined_train = pd.concat([
#     en_train[['ID', 'text', 'labels']],
#     am_train[['ID', 'tweet', 'label']].rename(columns={'tweet':'text', 'label':'labels'}),
# ])

# combined_test = pd.concat([
#     am_dev[['ID', 'tweet', 'label']].rename(columns={'tweet':'text', 'label':'labels'}),
#     en_test[['ID', 'text', 'labels']]
# ])
# test_split_lengths = [('am_test', len(am_test)), ('am_dev', len(am_dev)), ('en_test', len(en_test))]

In [90]:
get_domain = lambda t: t.split('_')[0] if type(t)==str else 'en'
domains = combined_train['ID'].apply(get_domain).unique()
domains
domain2id = {d: i for i, d in enumerate(domains)}
id2domain = {i: d for d, i in domain2id.items()}
domain2id

{'en': 0, 'am': 1}

In [91]:

label2id = {"positive":0, "neutral":1, 'negative':2}
id2label = {0:"positive", 1:"neutral", 2:'negative'}

tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")

do_domain = True

def encode_batch(row):
    text = ' '.join(filter(lambda x:x[0]!='@', row.text.split() if type(row.text)==str else []))
    out = tokenizer(text, max_length=100, truncation=True, padding="max_length", return_tensors='pt')
    out['labels'] = torch.LongTensor([label2id[row.labels]])[0]
    return out

def adapt_encode(row):
    real_out = dict()
    out = encode_batch(row)
    return {
        'imgs': torch.vstack([out['input_ids'], out['attention_mask']]),
        'labels': torch.int(out['labels']),
        'domain': torch.int(domain2id[get_domain(row.ID)])
    }

en_train = en_train.apply(adapt_encode, axis=1)
en_test = en_test.apply(adapt_encode, axis=1)
am_train = am_train.apply(adapt_encode, axis=1)
am_dev = am_dev.apply(adapt_encode, axis=1)

TypeError: 'torch.dtype' object is not callable

In [None]:
en_train

In [83]:
class SimpleSourceAndTargetDataset(torch.utils.data.Dataset):
    def __init__(self, s, t):
        self.s = s
        self.t = t

    def __len__(self) -> int:
        return len(self.t)

    def __getitem__(self, idx):
        tgt = self.t.iloc[idx]
        src = self.s.iloc[self.get_random_src_idx()]
        return {
            'src_imgs': torch.Tensor(src['imgs']),
            'src_labels': torch.LongTensor(src['labels']),
            'src_domain': torch.LongTensor(src['domain']),
            'target_imgs': torch.Tensor(tgt['imgs']),
            'target_labels': torch.LongTensor(tgt['labels']),
            'target_domain': torch.LongTensor(tgt['domain']),
        }
    
    def get_random_src_idx(self):
        return np.random.choice(len(self.s))
    
class SimpleTargetDataset(torch.utils.data.Dataset):
    def __init__(self, t):
        self.t = t

    def __len__(self) -> int:
        return len(self.t)

    def __getitem__(self, idx):
        tgt = self.t.iloc[idx]
        return {
            'target_imgs': torch.FloatTensor(tgt['imgs']),
            'target_labels': torch.LongTensor(tgt['labels']),
            'target_domain': torch.LongTensor(tgt['domain']),
        }

In [84]:
train_data = SimpleSourceAndTargetDataset(en_train, am_train)
train_data[0].keys()

TypeError: expected TensorOptions(dtype=float, device=cpu, layout=Strided, requires_grad=false (default), pinned_memory=false (default), memory_format=(nullopt)) (got TensorOptions(dtype=__int64, device=cpu, layout=Strided, requires_grad=false (default), pinned_memory=false (default), memory_format=(nullopt)))

In [77]:
from torch import nn

In [78]:
class Generator(nn.Module):
    def __init__(self, ):
        super().__init__()
        model = AutoAdapterModel.from_pretrained('xlm-roberta-base')
        model.add_adapter("sa")
        model.train_adapter("sa")
        model.set_active_adapters("sa")
        self.model = model

    def forward(self, x):
        a = self.model(x[:, 0], x[:, 1]).pooler_output
        return a

In [81]:

device = torch.device("cpu")

from pytorch_adapt.containers import LRSchedulers
from pytorch_adapt.containers import Optimizers
from pytorch_adapt.adapters import DANN
from pytorch_adapt.containers import Models

G = Generator()
C = torch.nn.Linear(768, 10)
D = torch.nn.Sequential(torch.nn.Linear(768, 1), torch.nn.Flatten(start_dim=0))
models = Models({"G": G, "C": C, "D": D})


optimizers = Optimizers((torch.optim.Adam, {"lr": 1}))
lr_schedulers = LRSchedulers(
    {
        "G": (torch.optim.lr_scheduler.ExponentialLR, {"gamma": 0.99}),
        "C": (torch.optim.lr_scheduler.StepLR, {"step_size": 2}),
    },
    scheduler_types={"per_step": ["G"], "per_epoch": ["C"]},
)
adapter = DANN(models=models, optimizers=optimizers, lr_schedulers=lr_schedulers)

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaAdapterModel: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing XLMRobertaAdapterModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaAdapterModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaAdapterModel were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['roberta.embeddings.position_ids']
You should probably TRAIN this model on a down-stream task to be able to use it for prediction

In [82]:
loss = adapter.training_step(train_data[0])
loss

AttributeError: in DANNHook: __call__
in ChainHook: __call__
in OptimizerHook: __call__
in ChainHook: __call__
in AssertHook: __call__
in OnlyNewOutputsHook: __call__
in ChainHook: __call__
in DomainLossHook: __call__
DomainLossHook: Expecting 'src_domain' and 'target_domain' in inputs
DomainLossHook: Computing loss for src domain
'int' object has no attribute 'type'

In [70]:
model(data[0]['src_imgs'][0], data[0]['src_imgs'][1]).pooler_output.shape

torch.Size([1, 768])

In [6]:
train

0        [input_ids, attention_mask, labels]
1        [input_ids, attention_mask, labels]
2        [input_ids, attention_mask, labels]
3        [input_ids, attention_mask, labels]
4        [input_ids, attention_mask, labels]
                        ...                 
10171    [input_ids, attention_mask, labels]
10172    [input_ids, attention_mask, labels]
10173    [input_ids, attention_mask, labels]
10174    [input_ids, attention_mask, labels]
10175    [input_ids, attention_mask, labels]
Name: 0, Length: 10176, dtype: object

In [None]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.x_data)

    def __getitem__(self, idx):
        X = self.x_data[idx].to(device)
        y = self.y_data[idx].to(device) 
        return X, y

In [8]:
model = AutoAdapterModel.from_pretrained('xlm-roberta-base')
model.add_adapter("sa")
model.train_adapter("sa")
model.add_classification_head("sa", num_labels=3)
model.set_active_adapters("sa")

NOTE: Redirects are currently not supported in Windows or MacOs.
Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaAdapterModel: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.bias']
- This IS expected if you are initializing XLMRobertaAdapterModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaAdapterModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaAdapterModel were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['roberta.embeddings.position_ids']
You should probably TRAIN thi

In [None]:
def run_experiment(hidden_sizes=(4096, 4096), dropout=0.5, lr=1e-3, warmup_steps=100, num_epochs=15, updates_per_epoch=4, title='', leave=False):
    
    show = False
    losses = []
        
    net = ViTAdapterModel.from_pretrained(
                model_name_or_path
    )
    net.add_adapter("microstructures")
    net.register_custom_head("my_custom_head", CustomHead)
    net.add_custom_head(head_type="my_custom_head", head_name="custom_head", **{'hidden_sizes': hidden_sizes, 'dropout':dropout})
    net.train_adapter("microstructures")
    net = net.to(device)

    train_dataloader = torch.utils.data.DataLoader(fold['train'], batch_size=batch_size, shuffle=True)
    valid_dataloader = torch.utils.data.DataLoader(fold['valid'], batch_size=batch_size, shuffle=False)
    valid2_dataloader = torch.utils.data.DataLoader(fold['valid2'], batch_size=batch_size, shuffle=False)
    test_dataloader = torch.utils.data.DataLoader(fold['test'], batch_size=batch_size, shuffle=False)
    test2_dataloader = torch.utils.data.DataLoader(fold['test2'], batch_size=batch_size, shuffle=False)

    num_batches = len(train_dataloader)
    num_valid_batches = len(valid_dataloader)
    num_test_batches = len(test_dataloader)

    num_valid2_batches = len(valid2_dataloader)
    num_test2_batches = len(test2_dataloader)

    optimizer = torch.optim.AdamW(net.parameters(), lr=lr)
    scheduler = get_linear_schedule_with_warmup(optimizer, warmup_steps, num_batches * num_epochs)

    train_losses, valid_losses, test_losses = [], [], []
    valid2_losses, test2_losses = [], []

    update_idxs = set([i * (num_batches // updates_per_epoch) 
        for i in range(1, updates_per_epoch)] + [num_batches])

    best_losses = []
    best_valid2 = 1e9

    for epoch in range(1, 1+num_epochs):
        total_loss = 0.0 
        net.train()

        pbar = tqdm(train_dataloader, desc=f"Fold {fold_num}, Epoch {epoch}", leave=leave)
        for idx, (X, y) in enumerate(pbar, start=1):
            optimizer.zero_grad()
            out = net(X)
            loss_val = criterion(out, y)
            total_loss += loss_val.item()
            loss_val.backward()
            optimizer.step()
            scheduler.step()

            if idx in update_idxs:
                avg_valid_loss = 0.0
                avg_test_loss = 0.0
                avg_valid2_loss = 0.0
                avg_test2_loss = 0.0
                net.eval()
                with torch.no_grad():

                    for X, y in valid_dataloader:
                        out = net(X)
                        loss_val = criterion(out, y)
                        avg_valid_loss += loss_val.item() / num_valid_batches
                    valid_losses.append(avg_valid_loss)

                    for X, y in test_dataloader:
                        out = net(X)
                        loss_val = criterion(out, y)
                        avg_test_loss += loss_val.item() / num_test_batches
                    test_losses.append(avg_test_loss)

                    for X, y in valid2_dataloader:
                        out = net(X)
                        loss_val = criterion(out, y)
                        avg_valid2_loss += loss_val.item() / num_valid2_batches
                    valid2_losses.append(avg_valid2_loss)

                    for X, y in test2_dataloader:
                        out = net(X)
                        loss_val = criterion(out, y)
                        avg_test2_loss += loss_val.item() / num_test_batches
                    test2_losses.append(avg_test2_loss)

                    if best_valid2 > avg_valid_loss + avg_valid2_loss:
                        best_valid2 = avg_valid_loss + avg_valid2_loss
                        best_losses = [total_loss / idx, avg_valid_loss, avg_valid2_loss,
                                      avg_test_loss, avg_test2_loss]


                pbar.set_description(f"Fold {fold_num}, Epoch {epoch} | tr {total_loss / idx:.2f}" + \
                                    f" | v1 {avg_valid_loss:.2f}  | v2 {avg_valid2_loss:.2f}" + \
                                    f"| t1 {avg_test_loss:.2f} | t2 {avg_test2_loss:.2f}")
                train_losses.append(total_loss / idx)
        losses.append(best_losses)

In [9]:
import numpy as np
from transformers import TrainingArguments, AdapterTrainer, EvalPrediction

training_args = TrainingArguments(
    learning_rate=1e-4,
    num_train_epochs=7,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    logging_steps=200,
    output_dir="./training_output",
    overwrite_output_dir=True
#     # The next line is important to ensure the dataset labels are properly passed to the model
#     remove_unused_columns=False,
)

def compute_scores(p: EvalPrediction):
    preds = np.argmax(p.predictions, axis=1)
    i, output = 0, dict()
    for name, split_length in test_split_lengths:
        s = np.s_[i:i+split_length]
        split_preds = preds[s]
        split_labels = p.label_ids[s]
        output[f'{name}_acc'] = (split_preds==split_labels).mean()
        output[f'{name}_weighted_f1'] = f1_score(split_preds, split_labels, average='weighted')
        output[f'{name}_balanced_accurancy'] = balanced_accuracy_score(split_preds, split_labels)
        i += split_length
    return output

trainer = AdapterTrainer(
    model=model,
    args=training_args,
    train_dataset=train,
    eval_dataset=test,
    compute_metrics=compute_scores,
)

In [10]:

trainer.train()

***** Running training *****
  Num examples = 13771
  Num Epochs = 7
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 3017


Step,Training Loss
200,1.012
400,0.918
600,0.8917
800,0.8583
1000,0.8441
1200,0.8391
1400,0.8307
1600,0.8068
1800,0.8005
2000,0.7956


Saving model checkpoint to ./training_output\checkpoint-500
Configuration saved in ./training_output\checkpoint-500\sa\adapter_config.json
Module weights saved in ./training_output\checkpoint-500\sa\pytorch_adapter.bin
Configuration saved in ./training_output\checkpoint-500\sa\head_config.json
Module weights saved in ./training_output\checkpoint-500\sa\pytorch_model_head.bin
Configuration saved in ./training_output\checkpoint-500\sa\head_config.json
Module weights saved in ./training_output\checkpoint-500\sa\pytorch_model_head.bin
Saving model checkpoint to ./training_output\checkpoint-1000
Configuration saved in ./training_output\checkpoint-1000\sa\adapter_config.json
Module weights saved in ./training_output\checkpoint-1000\sa\pytorch_adapter.bin
Configuration saved in ./training_output\checkpoint-1000\sa\head_config.json
Module weights saved in ./training_output\checkpoint-1000\sa\pytorch_model_head.bin
Configuration saved in ./training_output\checkpoint-1000\sa\head_config.json
Mod

TrainOutput(global_step=3017, training_loss=0.8335713129546178, metrics={'train_runtime': 593.918, 'train_samples_per_second': 162.307, 'train_steps_per_second': 5.08, 'total_flos': 5039674817725800.0, 'train_loss': 0.8335713129546178, 'epoch': 7.0})

In [11]:
trainer.evaluate()

***** Running Evaluation *****
  Num examples = 4934
  Batch size = 32


{'eval_loss': 0.809025228023529,
 'eval_am_test_acc': 0.6006683375104428,
 'eval_am_test_weighted_f1': 0.6024775787444283,
 'eval_am_test_balanced_accurancy': 0.5770568131477881,
 'eval_am_dev_acc': 0.5976627712854758,
 'eval_am_dev_weighted_f1': 0.5989814862583934,
 'eval_am_dev_balanced_accurancy': 0.578623886704277,
 'eval_en_test_acc': 0.6593371574251116,
 'eval_en_test_weighted_f1': 0.670796609889722,
 'eval_en_test_balanced_accurancy': 0.6550661143187694,
 'eval_runtime': 14.8375,
 'eval_samples_per_second': 332.535,
 'eval_steps_per_second': 10.446,
 'epoch': 7.0}