In [1]:
import pandas as pd
from transformers import AutoAdapterModel, AdapterConfig, AutoTokenizer
import torch
import numpy as np
from sklearn.metrics import f1_score

In [2]:
en_train = pd.read_csv('translated_train_all.csv')
en_test = pd.read_csv('translated_test_all.csv')

In [3]:
en_train

Unnamed: 0,text,labels,translated
0,Happy birthday Roman Abramovich. Treat yoursel...,positive,መልካም ልደት የሮማውያን የሮማውያን አብርሃም.በጥር ወር ውስጥ ጥሩ ወዳጃ...
1,Tampa Bay Buccaneers: Game-by-Game Predictions...,neutral,ታምፓ ቤተር ቡካነሮች-የጨዋታ-ጨዋታ-ጨዋታ ትንበያ ለ 2 ኛው ግማሽ ጊዜ ...
2,Interesting: Sacha Baron Cohen to play Freddi...,positive,የሚስብ: SACHA ባሮን ኮሮን በኋለኛው ባዮቲክ ውስጥ ፍሬድዲ ሜርኬሪ P...
3,DealBook: Clearwire Is Sought by Sprint for Sp...,neutral,Selegbobe: ማፅዳት ለትርፍ ይፈለጋል-ሐሙስ በሚሽከረከርበት ጊዜ ሐሙ...
4,BREAKING NEWS...........Man utd have placed Ho...,neutral,ሰበር ዜና ......... የሰው ልጅ UTD አዲስ ፈራጅ ማርክ ክሬቴንትበ...
...,...,...,...
9578,"Good night out with the boys,party bus down to...",positive,"ጥሩ ምሽት ከወንዶቹ, ነገ ከከተማው ጋር ነገ ከከተማይቱ (ኦስ ኦስ ቅዳሜ..."
9579,I wanna go crazy with Zayn till we see the sun...,neutral,ፀሐይን እስክናይ ድረስ ከጽን ጋር እብድ እፈልጋለሁ ...
9580,@Pike_JSpell are y'all going to come jam with ...,neutral,@Pike_jesplownsss ነገ ነገ ኮንትሮባንድ ጋር መምጣት የሚሄዱት?...
9581,@Nessaa456 the 6th chapter talks about malcolm...,neutral,@ Sensao456 የ 6 ኛ ምዕራፍ ስለ ማልኮም ኤክስ ነው እና እንደማስ...


In [4]:
am_train = pd.read_csv('am_train_translated.csv')
am_train, am_dev, am_test = np.split(
    am_train.sample(frac=1, random_state=42), [int(.7*len(am_train)), int(.8*len(am_train))])

am_train

Unnamed: 0,ID,tweet,label,eng_translated
1073,am_train_01074,ለትራምፕ ስንሳቀቅ ህውሀት ከመቀሌ ደሞ አልወሀድም ብላለች እና አሁን ህ...,negative,Habitheu says that when we laughed for the gru...
5468,am_train_05469,ለኔ ታላቅ ክብር ነው! እነበመባሌ የተሰማኝን ትልቅ ክብር በቃላት ልገልፀ...,positive,It is a great glory for me!I can't express it ...
5306,am_train_05307,በስህተት ወደ አካውንታቸው የገባውን አንድ ሚልዮን ብር በቅንነት የመለሱት...,positive,"In an error, the individual who first came to ..."
1321,am_train_01322,የሴቶች ደህንነት እንደ ዜጋ ቅድሚያ ይልተሰጠ... የችግኝን ያህል ግድ ያ...,negative,Women's Safety as a Citizen ... What is the pr...
4844,am_train_04845,ነገሮችን እንደአመጣጣቸው መቀበል የግድ ነው በዛው እሳቤ ኑሮ ትቂት ጊዜ...,positive,It is a necessity that we are coming as their ...
...,...,...,...,...
5700,am_train_05701,ጀግና ኢትዮጵያዊ ሁሌም ከእናንተ ጋር ነን,positive,Hero Ethiopian are always with you
5129,am_train_05130,ለመልካም ዕድልሲል በአውሮፕላን ሞተር ውስጥ ሳንቲም የወረወረው ግለሰብ ተ...,positive,The penny in a plane engine for fine lines!For...
3249,am_train_03250,ድርጊቱ መፈፀሙ ያልተገባ ቢሆንም ልዩ ልዩ ትንኮሳዎች ቢፈታተኑንም ቅሉ ዳ...,neutral,"Although the action is not unworthy, the struc..."
1208,am_train_01209,ትዕግስት ማለት ትዕግስት ማለት ብቻ አይደለም,negative,Trudy is not just Trudy


In [5]:
combined_train = pd.concat([
    am_train[['eng_translated', 'label', 'tweet']].rename(columns={
        'eng_translated':'en', 'label':'labels', 'tweet':'am'
    }),
    en_train[['text', 'labels', 'translated']].rename(columns={
        'text':'en', 'translated':'am'
    })
])

combined_test = pd.concat([
    am_test[['eng_translated', 'label', 'tweet']].rename(columns={
        'eng_translated':'en', 'label':'labels', 'tweet':'am'
    }),
    am_dev[['eng_translated', 'label', 'tweet']].rename(columns={
        'eng_translated':'en', 'label':'labels', 'tweet':'am'
    }),
    en_test[['text', 'labels', 'translated']].rename(columns={
        'text':'en', 'translated':'am'
    })
])
test_split_lengths = [('am_test', len(am_test)), ('am_dev', len(am_dev)), ('en_test', len(en_test))]

In [8]:

label2id = {"positive":0, "neutral":1, 'negative':2}
id2label = {0:"positive", 1:"neutral", 2:'negative'}

tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")

def encode_batch(row):
    full_dict = dict()
    for name in ['en', 'am']:
        text = ' '.join(filter(lambda x:x[0]!='@', row[name].split() if type(row[name])==str else [' ']))
        out = tokenizer(text, max_length=100, truncation=True, padding="max_length", return_tensors='pt')
        out['labels'] = torch.LongTensor([label2id[row.labels]])[0]
        for key, value in out.items():
            full_dict[f'{name}_{key}'] = value
    return full_dict

train = combined_train.apply(encode_batch, axis=1).reset_index()[0]

len(train)

13771

In [9]:
test = combined_test.apply(encode_batch, axis=1).reset_index()[0]

len(test)

4934

In [32]:
en_model = AutoAdapterModel.from_pretrained('xlm-roberta-base')
en_model.add_adapter("sa")
en_model.train_adapter("sa")
en_model.add_classification_head("sa", num_labels=3)
en_model.set_active_adapters("sa")

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaAdapterModel: ['lm_head.dense.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing XLMRobertaAdapterModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaAdapterModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaAdapterModel were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['roberta.embeddings.position_ids']
You should probably TRAIN this model on a down-stream task to be able to use it for prediction

In [33]:
am_model = AutoAdapterModel.from_pretrained('xlm-roberta-base')
am_model.add_adapter("sa")
am_model.train_adapter("sa")
# am_model.add_classification_head("sa", num_labels=3)
am_model.set_active_adapters("sa")

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaAdapterModel: ['lm_head.dense.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing XLMRobertaAdapterModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaAdapterModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaAdapterModel were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['roberta.embeddings.position_ids']
You should probably TRAIN this model on a down-stream task to be able to use it for prediction

In [35]:
head = en_model.heads['sa']
en_model.delete_head('sa')
head

ClassificationHead(
  (0): Dropout(p=0.1, inplace=False)
  (1): Linear(in_features=768, out_features=768, bias=True)
  (2): Activation_Function_Class(
    (f): Tanh()
  )
  (3): Dropout(p=0.1, inplace=False)
  (4): Linear(in_features=768, out_features=3, bias=True)
)

In [41]:
datum = train.iloc[0]
am_model(datum['am_input_ids'], datum['am_attention_mask']).pooler_output

tensor([[-4.8255e-02,  2.6466e-01,  1.4748e-01,  4.9844e-01, -2.2852e-02,
          3.3957e-01,  4.1229e-01, -4.4294e-01,  1.2508e-01, -1.7262e-01,
          1.1965e-01,  1.2945e-01,  3.8452e-01,  3.4946e-01, -2.1315e-01,
         -1.9194e-01,  1.9861e-01,  4.1296e-01, -3.8165e-02, -2.4928e-01,
         -2.7022e-01,  3.2548e-01, -6.6433e-01, -5.3539e-01, -2.2196e-01,
          5.8153e-01,  1.2145e-01, -3.1469e-01, -1.4031e-01,  6.4794e-01,
          1.4161e-01,  4.7685e-01, -2.5686e-01,  9.2925e-02,  9.6632e-02,
         -1.7666e-01,  3.9255e-01,  2.1039e-01,  4.7253e-01,  3.1692e-01,
          1.1767e-01, -1.3166e-01, -1.1009e-01,  2.4308e-01,  2.2894e-01,
         -2.5825e-01,  1.8567e-01, -5.4735e-02, -2.2145e-01,  3.8256e-01,
          5.7466e-01, -2.5924e-01,  4.5987e-02,  4.0382e-02,  1.6140e-01,
          1.7561e-01,  3.2409e-01, -2.9713e-01, -2.5705e-01, -5.0585e-01,
          2.1902e-02,  6.0602e-01,  4.0996e-01,  3.3709e-01,  3.1988e-01,
         -5.7198e-01,  1.1909e-01, -2.

In [42]:

en_model(datum['en_input_ids'], datum['en_attention_mask']).pooler_output

tensor([[-0.1630, -0.2444,  0.0464,  0.6477,  0.1834,  0.6088,  0.3035, -0.4116,
          0.2670,  0.0207,  0.1809,  0.0638,  0.0525,  0.3862, -0.4753, -0.1986,
          0.4439,  0.0101,  0.1767,  0.0474, -0.5748,  0.4201, -0.7491, -0.6483,
         -0.2721,  0.5789, -0.0101, -0.0829,  0.0570,  0.5318,  0.1083,  0.3474,
         -0.2996,  0.1450,  0.4583,  0.0035,  0.0954,  0.5226,  0.6545,  0.5353,
          0.0276, -0.2630, -0.1655, -0.0813,  0.4258, -0.1560,  0.1858,  0.1726,
         -0.2186,  0.2704,  0.4518,  0.2379, -0.0758,  0.1101,  0.1629,  0.3347,
          0.3880, -0.4262,  0.0114, -0.3948, -0.2267,  0.4753,  0.1453,  0.4607,
          0.2998, -0.5394,  0.1787, -0.1700, -0.6143,  0.3009,  0.0306, -0.1799,
          0.6475, -0.1543,  0.1878, -0.0900,  0.4939,  0.2990, -0.1266,  0.1899,
         -0.2604,  0.0056, -0.0219,  0.1530,  0.0889,  0.6042, -0.6138,  0.8332,
          0.2683,  0.0294,  0.0723,  0.0958, -0.3228,  0.3668, -0.5065,  0.1948,
         -0.2429, -0.5890, -

In [43]:
head(am_model(datum['am_input_ids'], datum['am_attention_mask']))[1]

tensor([[-4.7869e-02,  2.6767e-01,  1.4659e-01,  5.1228e-01,  2.1222e-03,
          3.4991e-01,  4.1621e-01, -4.3170e-01,  1.3669e-01, -1.5643e-01,
          1.3210e-01,  1.2155e-01,  3.8029e-01,  3.4419e-01, -2.1720e-01,
         -1.8610e-01,  1.9871e-01,  4.0466e-01, -5.3914e-02, -2.4750e-01,
         -2.7676e-01,  3.2171e-01, -6.6809e-01, -5.3984e-01, -2.2919e-01,
          5.7757e-01,  1.4443e-01, -3.1282e-01, -1.5114e-01,  6.3906e-01,
          1.3372e-01,  4.5950e-01, -2.6668e-01,  8.4528e-02,  8.0833e-02,
         -1.6383e-01,  3.7891e-01,  2.2333e-01,  4.7694e-01,  3.0504e-01,
          1.0771e-01, -1.1021e-01, -1.3105e-01,  2.4067e-01,  2.1850e-01,
         -2.6004e-01,  1.7227e-01, -5.1929e-02, -2.2440e-01,  3.9848e-01,
          5.6024e-01, -2.5976e-01,  5.5520e-02,  5.4747e-02,  1.7706e-01,
          1.7315e-01,  2.9831e-01, -2.9641e-01, -2.6570e-01, -5.2391e-01,
          2.4735e-02,  6.0531e-01,  4.2172e-01,  3.3452e-01,  3.2311e-01,
         -5.5445e-01,  1.3097e-01, -2.

In [12]:
train.iloc[0]

{'en_input_ids': tensor([[     0,  33124,  10902,     34,  17378,    450,   3229,    642,  94518,
             297,    100,     70,  23322,   2676,     53,    136,   5036,     70,
          133836,     83,    959,    142,  53702,    450,     83,    959,  78737,
            2481,      4,    136,    442,     83,   2373,     73,   3674,    678,
              70,  14537,    111,     70,  25443,     32,      2,      1,      1,
               1,      1,      1,      1,      1,      1,      1,      1,      1,
               1,      1,      1,      1,      1,      1,      1,      1,      1,
               1,      1,      1,      1,      1,      1,      1,      1,      1,
               1,      1,      1,      1,      1,      1,      1,      1,      1,
               1,      1,      1,      1,      1,      1,      1,      1,      1,
               1,      1,      1,      1,      1,      1,      1,      1,      1,
               1]]),
 'en_attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 

In [7]:
import numpy as np
from transformers import TrainingArguments, AdapterTrainer, EvalPrediction

training_args = TrainingArguments(
    learning_rate=1e-4,
    num_train_epochs=6,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    logging_steps=200,
    output_dir="./training_output",
    overwrite_output_dir=True
#     # The next line is important to ensure the dataset labels are properly passed to the model
#     remove_unused_columns=False,
)

def compute_scores(p: EvalPrediction):
    preds = np.argmax(p.predictions, axis=1)
    i, output = 0, dict()
    for name, split_length in test_split_lengths:
        s = np.s_[i:i+split_length]
        split_preds = preds[s]
        split_labels = p.label_ids[s]
        output[f'{name}_acc'] = (split_preds==split_labels).mean()
        output[f'{name}_weighted_f1'] = f1_score(split_labels, split_preds, average='weighted')
        i += split_length
    return output

trainer = AdapterTrainer(
    model=model,
    args=training_args,
    train_dataset=train,
    eval_dataset=test,
    compute_metrics=compute_scores,
)

In [None]:

trainer.train()

***** Running training *****
  Num examples = 9583
  Num Epochs = 6
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 1800


Step,Training Loss


In [None]:
trainer.evaluate()