In [1]:
!pip install -Uq pytorch-adapt

In [2]:
import pandas as pd
from transformers import AutoAdapterModel, AdapterConfig, AutoTokenizer
import torch
import numpy as np
from sklearn.metrics import f1_score, balanced_accuracy_score
from tqdm import tqdm

from pytorch_adapt.containers import Optimizers, LRSchedulers
from pytorch_adapt.hooks import DANNHook
from pytorch_adapt.containers import Models
from pytorch_adapt.models import Discriminator, Classifier
from torch import nn


import warnings
warnings.filterwarnings(action='ignore')

NOTE: Redirects are currently not supported in Windows or MacOs.


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

en_train = pd.read_csv("data/en_all.csv")

am_train = pd.read_csv('data/am/am_train_translated.csv').rename(columns={'tweet':'text', 'label':'labels'})
am_train, am_dev, am_test = np.split(am_train.sample(frac=1, random_state=42), [int(.7*len(am_train)), int(.8*len(am_train))])

cuda


In [4]:
get_domain = lambda t: t.split('_')[0] if type(t)==str else 'en'

domains = []
for d in en_train.ID:
    d = get_domain(d)
    if d not in domains:
        domains.append(d)
for d in am_train.ID:
    d = get_domain(d)
    if d not in domains:
        domains.append(d)
domain2id = {d: i for i, d in enumerate(domains)}
id2domain = {i: d for d, i in domain2id.items()}
domain2id

{'en': 0, 'am': 1}

In [5]:

label2id = {"positive":0, "neutral":1, 'negative':2}
id2label = {0:"positive", 1:"neutral", 2:'negative'}

tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")

do_domain = True

def encode_batch(row):
    text = ' '.join(filter(lambda x:x[0]!='@', row.text.split() if type(row.text)==str else []))
    out = tokenizer(text, max_length=100, truncation=True, padding="max_length", return_tensors='pt')
    out['labels'] = torch.LongTensor([label2id[row.labels]])[0]
    return out

def adapt_encode(row):
    real_out = dict()
    out = encode_batch(row)
    return {
        'imgs': torch.vstack([out['input_ids'], out['attention_mask']]),
        'labels': torch.LongTensor([out['labels']])[0],
        'domain': torch.LongTensor([domain2id[get_domain(row.ID)]])[0]
    }

en_train = en_train.apply(adapt_encode, axis=1)
                          
am_train = am_train.apply(adapt_encode, axis=1)
am_dev = am_dev.apply(adapt_encode, axis=1)
am_test = am_test.apply(adapt_encode, axis=1)

In [6]:
class SimpleSourceAndTargetDataset(torch.utils.data.Dataset):
    def __init__(self, s, t):
        self.s = s
        self.t = t

    def __len__(self) -> int:
        return len(self.t)

    def __getitem__(self, idx):
        tgt = self.t.iloc[idx]
        src = self.s.iloc[self.get_random_src_idx()]
        return {
            'src_imgs': src['imgs'].to(device),
            'src_labels': src['labels'].to(device),
            'src_domain': src['domain'].to(device),
            'target_imgs': tgt['imgs'].to(device),
#             'target_labels': tgt['labels'].to(device),
            'target_domain': tgt['domain'].to(device),
        }
    
    def get_random_src_idx(self):
        return np.random.choice(len(self.s))
    
class SimpleTargetDataset(torch.utils.data.Dataset):
    def __init__(self, t):
        self.t = t

    def __len__(self) -> int:
        return len(self.t)

    def __getitem__(self, idx):
        tgt = self.t.iloc[idx]
        return {
            'target_imgs': tgt['imgs'].to(device),
            'target_labels': tgt['labels'].to(device),
            'target_domain': tgt['domain'].to(device),
        }

In [7]:
train_data = SimpleSourceAndTargetDataset(en_train, am_train)
valid_data = SimpleTargetDataset(am_dev)
test_data = SimpleTargetDataset(am_test)

In [8]:
class Generator(nn.Module):
    def __init__(self, ):
        super().__init__()
        model = AutoAdapterModel.from_pretrained('xlm-roberta-base')
        model.add_adapter("sa")
        model.train_adapter("sa")
        model.set_active_adapters("sa")
        self.model = model

    def forward(self, x):
        a = self.model(x[:, 0], x[:, 1]).pooler_output
        return a

In [11]:
def run_experiment(
    C_h=256,
    D_h=256,
    lr=1e-4,
    classifier_lr=1e-3,
    num_epochs=20, 
    batch_size=16,
    updates_per_epoch=4, 
    leave=False):

    train_dataloader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
    valid_dataloader = torch.utils.data.DataLoader(valid_data, batch_size=batch_size, shuffle=False)
    test_dataloader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=False)

    num_batches = len(train_dataloader)
    num_valid_batches = len(valid_dataloader)
    num_test_batches = len(test_dataloader)
    
    G = Generator().to(device)
    C = Classifier(3, in_size=768, h=C_h).to(device)
    D = Discriminator(in_size=768, h=D_h).to(device)
    
    models = Models({"G": G, "C": C, "D": D})
    optimizers = Optimizers(
        {"G": (torch.optim.AdamW, {"lr": lr}),
         "C": (torch.optim.AdamW, {"lr": classifier_lr}),
         "D": (torch.optim.AdamW, {"lr": classifier_lr})}
    )
    optimizers.create_with(models)
    optimizers = list(optimizers.values())
    
#     lr_schedulers = LRSchedulers(
#         {
#             "G": (torch.optim.lr_scheduler.ExponentialLR, {"gamma": 0.99}),
#             "C": (torch.optim.lr_scheduler.ExponentialLR, {"gamma": 0.99}),
#         },
#         scheduler_types={"per_step": ["G", "C"]},
#     )
    
    hook = DANNHook(optimizers)

    update_idxs = set([i * (num_batches // updates_per_epoch) 
        for i in range(1, updates_per_epoch)] + [num_batches])

    best_losses = dict()
    best_valid = -1
    best_test = -1

    for epoch in range(1, 1+num_epochs):
        total_loss = 0.0 

        pbar = tqdm(train_dataloader, desc=f"Epoch {epoch}", leave=leave)
        for idx, data in enumerate(pbar, start=1):
            models.train()
            _, loss = hook({**models, **data})
            
            total_loss += loss['total_loss']['total']

            if idx in update_idxs:
                
                models.eval()
                with torch.no_grad():
                    logits = []
                    ans = []
                    for data in valid_dataloader:
                        logits.append(C(G(data["target_imgs"])))
                        ans.append(data["target_labels"])
                    valid_preds = torch.cat(logits, dim=0).argmax(-1).cpu().numpy()
                    valid_ans = torch.cat(ans, dim=0).cpu().numpy()
                    valid_bal_acc = balanced_accuracy_score(valid_preds, valid_ans)
                    
                    if valid_bal_acc > best_valid:
                        best_valid = valid_bal_acc
                        best_losses = dict()
                        
                        best_losses['dev_balanced_accuracy'] = valid_bal_acc
                        best_losses['dev_f1'] = f1_score(valid_preds, valid_ans, average='weighted')
                        
                        logits = []
                        ans = []
                        for data in test_dataloader:
                            logits.append(C(G(data["target_imgs"])))
                            ans.append(data["target_labels"])
                        test_preds = torch.cat(logits, dim=0).argmax(-1).cpu().numpy()
                        test_ans = torch.cat(ans, dim=0).cpu().numpy()
                        
                        best_losses['test_balanced_accuracy'] = balanced_accuracy_score(test_preds, test_ans)
                        best_losses['test_f1'] = f1_score(test_preds, test_ans, average='weighted')


                pbar.set_description(f" Epoch {epoch} | tr {total_loss / idx:.3f}" + \
                                    f" | valid bal_acc {valid_bal_acc:.2f} | test bal_acc {best_losses['test_balanced_accuracy']:.2f}")
#                 train_losses.append(total_loss / idx)
    return best_losses

In [13]:
run_experiment(C_h=256,
    D_h=256,
    lr=1e-5,
    classifier_lr=1e-4,
    num_epochs=20, leave=True)

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaAdapterModel: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.bias']
- This IS expected if you are initializing XLMRobertaAdapterModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaAdapterModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaAdapterModel were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['roberta.embeddings.position_ids']
You should probably TRAIN this model on a down-stream task to be able to use it for prediction