In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
!pip install transformers

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/3a/83/e74092e7f24a08d751aa59b37a9fc572b2e4af3918cb66f7766c3affb1b4/transformers-3.5.1-py3-none-any.whl (1.3MB)
[K     |▎                               | 10kB 20.3MB/s eta 0:00:01[K     |▌                               | 20kB 27.2MB/s eta 0:00:01[K     |▊                               | 30kB 26.5MB/s eta 0:00:01[K     |█                               | 40kB 18.9MB/s eta 0:00:01[K     |█▎                              | 51kB 17.2MB/s eta 0:00:01[K     |█▌                              | 61kB 18.9MB/s eta 0:00:01[K     |█▊                              | 71kB 14.5MB/s eta 0:00:01[K     |██                              | 81kB 15.2MB/s eta 0:00:01[K     |██▎                             | 92kB 15.0MB/s eta 0:00:01[K     |██▌                             | 102kB 14.0MB/s eta 0:00:01[K     |██▊                             | 112kB 14.0MB/s eta 0:00:01[K     |███                             | 

In [2]:
!pip install emoji

Collecting emoji
[?25l  Downloading https://files.pythonhosted.org/packages/ff/1c/1f1457fe52d0b30cbeebfd578483cedb3e3619108d2d5a21380dfecf8ffd/emoji-0.6.0.tar.gz (51kB)
[K     |██████▍                         | 10kB 16.5MB/s eta 0:00:01[K     |████████████▉                   | 20kB 20.9MB/s eta 0:00:01[K     |███████████████████▎            | 30kB 15.8MB/s eta 0:00:01[K     |█████████████████████████▊      | 40kB 14.0MB/s eta 0:00:01[K     |████████████████████████████████| 51kB 5.2MB/s 
[?25hBuilding wheels for collected packages: emoji
  Building wheel for emoji (setup.py) ... [?25l[?25hdone
  Created wheel for emoji: filename=emoji-0.6.0-cp36-none-any.whl size=49716 sha256=4abd722f1f4caa7d7e4e326c79ebdec34d9ae77ef8f3043dfa24b7c8cc3ce623
  Stored in directory: /root/.cache/pip/wheels/46/2c/8b/9dcf5216ca68e14e0320e283692dce8ae321cdc01e73e17796
Successfully built emoji
Installing collected packages: emoji
Successfully installed emoji-0.6.0


In [3]:
import warnings

warnings.simplefilter("ignore", UserWarning)
warnings.simplefilter("ignore", FutureWarning)
warnings.simplefilter("ignore", DeprecationWarning)

import tensorflow as tf
import torch
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, RandomSampler
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from transformers import XLMRobertaTokenizer, XLMRobertaForSequenceClassification, AdamW, get_linear_schedule_with_warmup
from tqdm import tqdm, trange
import pandas as pd
import io
import os
import numpy as np
from sklearn.metrics import f1_score, accuracy_score
from statistics import mode

import re
import emoji
import random

import nltk
nltk.download('stopwords')
from nltk import word_tokenize, pos_tag
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import sent_tokenize, TweetTokenizer
from nltk.corpus import wordnet, stopwords
from imblearn.over_sampling import SMOTE

# specify GPU device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()
torch.cuda.get_device_name(0)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


'Tesla V100-SXM2-16GB'

In [5]:
def preprocess(df):
    
    #removes URL
    pattern = r'https.?://[^\s]+[\s]?'
    df["tweet"] = df["tweet"].str.replace(pat=pattern, repl="", regex=True)
    
    #removes usernames/mentions
    pattern = r'@[^\s]+'
    df["tweet"] = df["tweet"].str.replace(pat=pattern, repl="", regex=True)
    
    #removes emoji and smiley
    pattern = re.compile("["
                         u"\U0001F600-\U0001F64F"
                         u"\U0001F300-\U0001F5FF"
                         u"\U0001F680-\U0001F6FF"
                         u"\U0001F1E0-\U0001F1FF"
                         u"\U00002500-\U00002BEF"
                         u"\U00002702-\U000027B0"
                         u"\U00002702-\U000027B0"
                         u"\U000024C2-\U0001F251"
                         u"\U0001f926-\U0001f937"
                         u"\U00010000-\U0010ffff"
                         u"\u2640-\u2642"
                         u"\u2600-\u2B55"
                         u"\u200d"
                         u"\u23cf"
                         u"\u23e9"
                         u"\u231a"
                         u"\ufe0f"
                         u"\u3030"
                         "]+", flags=re.UNICODE)
    df["tweet"] = df["tweet"].str.replace(pat=pattern, repl="", regex=True)
    
    #removes numbers
    pattern = r'\d+'
    df["tweet"] = df["tweet"].str.replace(pat=pattern, repl="", regex=True)
    
    #removes punctuation
    pattern = r"[^\w\s]"
    df["tweet"] = df["tweet"].str.replace(pat=pattern, repl=" ", regex=True)

    #removes stop words
    stop_words = stopwords.words("english")    
    remove_stop_words = lambda row: " ".join([token for token in row.split(" ")
                                              if token not in stop_words])
    df["tweet"] = df["tweet"].apply(remove_stop_words)
    
    #removes extra spaces
    pattern = r"[\s]+"
    df["tweet"] = df["tweet"].str.replace(pat=pattern, repl=" ", regex=True)
    
    return(df)

In [6]:
def train_validate_split(df,seed=42,validate_percent = 0.1):
  train, validate = train_test_split(df, test_size=validate_percent, random_state=seed, stratify=df['label'])
  return train, validate

def train_validate_test_split(df,seed, train_percent=.8, validate_percent=.125):
  train, test = train_test_split(df, train_size=train_percent, random_state=seed, stratify=df['label'])
  train, validate = train_test_split(train, test_size=validate_percent, random_state=seed, stratify=train['label'])
  return train, validate, test

def tokenize_data(df):
    sentences = ["[CLS] " + query + " [SEP]" for query in df['tweet']]
    # Tokenize with multilingual BERT tokenizer
    tokenizer = XLMRobertaTokenizer.from_pretrained('xlm-roberta-base')
    tokenized_texts = [tokenizer.tokenize(sent) for sent in sentences]
    MAX_LEN = 128

    # Use the BERT tokenizer to convert the tokens to their index numbers in the BERT vocabulary
    input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts],
                          maxlen=MAX_LEN, dtype="long", truncating="post", padding="post")
    labels = df['label'].copy()
    # Create attention masks
    attention_masks = []
    # Create a mask of 1s for each token followed by 0s for padding
    for seq in input_ids:
        seq_mask = [float(i>0) for i in seq]
        attention_masks.append(seq_mask)
    return input_ids, attention_masks, labels

def Data_Loader(inputs_ids, attention_masks, labels, batch_size=16): 
    data = TensorDataset(torch.LongTensor(inputs_ids), torch.LongTensor(attention_masks), torch.LongTensor(labels.ravel()))
    sampler = RandomSampler(data)
    dataloader = DataLoader(data, sampler=sampler, batch_size=batch_size)
    return dataloader

In [7]:
def model_train(model, train_dataloader, validation_dataloader):
    # BERT training loop
    epochs = 5
    for _ in trange(epochs, desc="Epoch"):  
        # Set our model to training mode
        model.train()
        # Tracking variables
        tr_loss = 0
        nb_tr_examples, nb_tr_steps = 0, 0
        # Train the data for one epoch
        for step, batch in enumerate(train_dataloader):
            # Add batch to GPU
            batch = tuple(t.to(device) for t in batch)
            # Unpack the inputs from our dataloader
            b_input_ids, b_input_mask, b_labels = batch
            # Clear out the gradients (by default they accumulate)
            optimizer.zero_grad()
            # Forward pass
            loss,_ = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)
            # Backward pass
            loss.backward()
            # Update parameters and take a step using the computed gradient
            optimizer.step()
            scheduler.step()
            # Update tracking variables
            tr_loss += loss.item()
            nb_tr_examples += b_input_ids.size(0)
            nb_tr_steps += 1
        print("Train loss: {}".format(tr_loss/nb_tr_steps))

        ## VALIDATION

        # Put model in evaluation mode
        model.eval()
        # Tracking variables 
        eval_loss, eval_accuracy = 0, 0
        nb_eval_steps, nb_eval_examples = 0, 0
        # Evaluate data for one epoch
        for batch in validation_dataloader:
            # Add batch to GPU
            batch = tuple(t.to(device) for t in batch)
            # Unpack the inputs from our dataloader
            b_input_ids, b_input_mask, b_labels = batch
            # Telling the model not to compute or store gradients, saving memory and speeding up validation
            with torch.no_grad():
                # Forward pass, calculate logit predictions
                logits = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)[0]
            # Move logits and labels to CPU
            logits = logits.detach().cpu().numpy()
            label_ids = b_labels.to('cpu').numpy()
            tmp_eval_accuracy = flat_accuracy(logits, label_ids)    
            eval_accuracy += tmp_eval_accuracy
            nb_eval_steps += 1
        validation_accuracy = (eval_accuracy/nb_eval_steps)
        print("Validation Accuracy: {}".format(eval_accuracy/nb_eval_steps))
    return validation_accuracy

In [8]:
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

def model_test(model,prediction_dataloader):
    model.eval()
    # Tracking variables 
    predictions , true_labels = [], []
# Predict 
    for batch in prediction_dataloader:
        # Add batch to GPU
        batch = tuple(t.to(device) for t in batch)
        # Unpack the inputs from our dataloader
        b_input_ids, b_input_mask, b_labels = batch
        # Telling the model not to compute or store gradients, saving memory and speeding up prediction
        with torch.no_grad():
            # Forward pass, calculate logit predictions
            logits = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)[0]
        # Move logits and labels to CPU
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()
        predictions+=list(np.argmax(logits, axis=1).flatten())
        true_labels+=list(label_ids.flatten())
    weighted_f1_score = f1_score(true_labels, predictions, average= 'weighted')
    print("Weighted F1 Score:",weighted_f1_score)
    macro_f1_score = f1_score(true_labels, predictions, average= 'macro')
    print("Macro F1 Score:",macro_f1_score)    
    test_accuracy_score = accuracy_score(true_labels, predictions)
    print("Accuracy score:", test_accuracy_score, "\n")
    print("="*100)
    return weighted_f1_score, macro_f1_score

In [9]:
def model_initialise(dataloader_len,path= None , use_saved_model=False):
  model = XLMRobertaForSequenceClassification.from_pretrained("xlm-roberta-base", num_labels = 2, output_attentions = False, output_hidden_states = False).cuda()
  optimizer = optim.AdamW(params = model.parameters(), lr=2e-5)
  epochs = 5
  total_steps = dataloader_len * epochs
  scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps = 0, num_training_steps = total_steps)
  
  if(use_saved_model==True):
    checkpoint = torch.load(path)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

  return model, optimizer, scheduler

In [10]:
print_stmts= []
languages = {'en_fr':'Arabic','fr_ar':'English','en_ar':'French'}
directory = './'
drive_directory = '/content/drive/My Drive/XLMRMultilingual models'
for lang1, lang2 in languages.items():
	df = pd.read_csv(os.path.join(directory, lang1+'.csv'))
	df = preprocess(df)
	train_df, validation_df = train_validate_split(df)
	train_input_ids, train_attention_masks, train_labels = tokenize_data(df)
	train_dataloader = Data_Loader(train_input_ids, train_attention_masks, train_labels)
	validation_input_ids, validation_attention_masks, validation_labels = tokenize_data(validation_df)
	validation_dataloader = Data_Loader(validation_input_ids, validation_attention_masks, validation_labels)
	model, optimizer, scheduler = model_initialise(len(train_dataloader))
	validation_accuracy = model_train(model, train_dataloader, validation_dataloader)
	fname = 'XLMR'+lang2+'.pth'
	path = os.path.join(drive_directory, fname)
	torch.save({'model_state_dict': model.state_dict(),
						'optimizer_state_dict': optimizer.state_dict()}, path)
	test_df = pd.read_csv(os.path.join(directory, lang2+'.csv'))
	test_input_ids, test_attention_masks, test_labels = tokenize_data(test_df)
	test_dataloader = Data_Loader(test_input_ids, test_attention_masks, test_labels)
	print("\nZero Shot Model for test:",lang2,'\n')
	_, _ = model_test(model, test_dataloader)
	df = pd.read_csv(os.path.join(directory, lang2+'.csv'))
	df = preprocess(df)
	weighted = []
	macro = []
	seeds = [2018, 2019, 2020, 2021, 2022]
	scores=[]
	for seed in seeds:
		np.random.seed(seed)
		train_df, validation_df, test_df = train_validate_test_split(df, seed)
		train_input_ids, train_attention_masks, train_labels = tokenize_data(train_df)
		train_dataloader = Data_Loader(train_input_ids, train_attention_masks, train_labels)
		validation_input_ids, validation_attention_masks, validation_labels = tokenize_data(validation_df)
		validation_dataloader = Data_Loader(validation_input_ids, validation_attention_masks, validation_labels)
		print("\nModel Summary:")
		print('Language:', lang2)
		print('Seed value:', seed)
		model, optimizer, scheduler = model_initialise(len(train_dataloader),path,use_saved_model=True)
		validation_accuracy = model_train(model, train_dataloader, validation_dataloader)
		test_input_ids, test_attention_masks, test_labels = tokenize_data(test_df)
		test_dataloader = Data_Loader(test_input_ids, test_attention_masks, test_labels)
		w, m = model_test(model, test_dataloader)
		weighted.append(w)
		macro.append(m)
	print("The Average  Weighted F1-Score of the Language ", lang2, "is:",sum(weighted)/ len(weighted))
	print("The Average  Macro F1-Score of the Language ", lang2, "is:",sum(macro)/ len(macro))
	print("="*200)
	print_stmts.append("Average Weighted F1-Score "+str(sum(weighted)/len(weighted))+" and Average Macro F1-Score "+str(sum(macro)/len(macro))+" of "+ str(lang2))
for i in print_stmts:
	print(i,"\n")

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=5069051.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=512.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1115590446.0, style=ProgressStyle(descr…




Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense

Train loss: 0.3734834009796569


Epoch:  20%|██        | 1/5 [16:39<1:06:38, 999.74s/it]

Validation Accuracy: 0.8605697151424287
Train loss: 0.32180686287823346


Epoch:  40%|████      | 2/5 [33:17<49:57, 999.17s/it]  

Validation Accuracy: 0.8744586040313177
Train loss: 0.2944139986020392


Epoch:  60%|██████    | 3/5 [49:54<33:17, 998.62s/it]

Validation Accuracy: 0.8913980509745127
Train loss: 0.26376109515860824


Epoch:  80%|████████  | 4/5 [1:06:31<16:38, 998.07s/it]

Validation Accuracy: 0.9080043311677495
Train loss: 0.23477912550828983


Epoch: 100%|██████████| 5/5 [1:23:08<00:00, 997.65s/it]

Validation Accuracy: 0.9182179743461603






Zero Shot Model for test: Arabic 

Weighted F1 Score: 0.7539611137082988
Macro F1 Score: 0.579280906376903
Accuracy score: 0.796821008984105 


Model Summary:
Language: Arabic
Seed value: 2018


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense

Train loss: 0.33319818572412563


Epoch:  20%|██        | 1/5 [00:38<02:35, 38.78s/it]

Validation Accuracy: 0.893581081081081
Train loss: 0.24456842366869994


Epoch:  40%|████      | 2/5 [01:17<01:55, 38.65s/it]

Validation Accuracy: 0.893581081081081
Train loss: 0.1924621929583235


Epoch:  60%|██████    | 3/5 [01:55<01:17, 38.56s/it]

Validation Accuracy: 0.8969594594594594
Train loss: 0.13619173324564252


Epoch:  80%|████████  | 4/5 [02:33<00:38, 38.50s/it]

Validation Accuracy: 0.9037162162162162
Train loss: 0.10047227145707983


Epoch: 100%|██████████| 5/5 [03:12<00:00, 38.44s/it]

Validation Accuracy: 0.9037162162162162





Weighted F1 Score: 0.9030220797171122
Macro F1 Score: 0.8530112353126162
Accuracy score: 0.9041450777202072 


Model Summary:
Language: Arabic
Seed value: 2019


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense

Train loss: 0.3348154091078231


Epoch:  20%|██        | 1/5 [00:38<02:33, 38.36s/it]

Validation Accuracy: 0.8902027027027027
Train loss: 0.23595602606518531


Epoch:  40%|████      | 2/5 [01:16<01:55, 38.36s/it]

Validation Accuracy: 0.902027027027027
Train loss: 0.16212986346598215


Epoch:  60%|██████    | 3/5 [01:55<01:16, 38.36s/it]

Validation Accuracy: 0.8806306306306307
Train loss: 0.11001020638587496


Epoch:  80%|████████  | 4/5 [02:33<00:38, 38.36s/it]

Validation Accuracy: 0.8969594594594594
Train loss: 0.06901852041207929


Epoch: 100%|██████████| 5/5 [03:11<00:00, 38.37s/it]

Validation Accuracy: 0.8969594594594594





Weighted F1 Score: 0.8951135569321306
Macro F1 Score: 0.8422156784360977
Accuracy score: 0.8955094991364422 


Model Summary:
Language: Arabic
Seed value: 2020


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense

Train loss: 0.3392985665833387


Epoch:  20%|██        | 1/5 [00:38<02:33, 38.36s/it]

Validation Accuracy: 0.8997747747747747
Train loss: 0.23165368424420515


Epoch:  40%|████      | 2/5 [01:16<01:55, 38.36s/it]

Validation Accuracy: 0.910472972972973
Train loss: 0.16688190930060984


Epoch:  60%|██████    | 3/5 [01:55<01:16, 38.37s/it]

Validation Accuracy: 0.8918918918918919
Train loss: 0.11207939455318668


Epoch:  80%|████████  | 4/5 [02:33<00:38, 38.37s/it]

Validation Accuracy: 0.902027027027027
Train loss: 0.07831082947774035


Epoch: 100%|██████████| 5/5 [03:11<00:00, 38.37s/it]

Validation Accuracy: 0.9121621621621622





Weighted F1 Score: 0.9152442062445174
Macro F1 Score: 0.8727849122932884
Accuracy score: 0.9153713298791019 


Model Summary:
Language: Arabic
Seed value: 2021


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense

Train loss: 0.3347698391275847


Epoch:  20%|██        | 1/5 [00:38<02:33, 38.35s/it]

Validation Accuracy: 0.8952702702702703
Train loss: 0.2297513915416528


Epoch:  40%|████      | 2/5 [01:16<01:55, 38.35s/it]

Validation Accuracy: 0.8986486486486487
Train loss: 0.1544889106825874


Epoch:  60%|██████    | 3/5 [01:55<01:16, 38.35s/it]

Validation Accuracy: 0.893581081081081
Train loss: 0.10174737859693334


Epoch:  80%|████████  | 4/5 [02:33<00:38, 38.35s/it]

Validation Accuracy: 0.8868243243243243
Train loss: 0.06801824952479393


Epoch: 100%|██████████| 5/5 [03:11<00:00, 38.35s/it]

Validation Accuracy: 0.8918918918918919





Weighted F1 Score: 0.8946241366100686
Macro F1 Score: 0.8383294441452761
Accuracy score: 0.8972366148531952 


Model Summary:
Language: Arabic
Seed value: 2022


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense

Train loss: 0.3164765451206114


Epoch:  20%|██        | 1/5 [00:38<02:33, 38.37s/it]

Validation Accuracy: 0.8811936936936936
Train loss: 0.2179719464941405


Epoch:  40%|████      | 2/5 [01:16<01:55, 38.37s/it]

Validation Accuracy: 0.8800675675675675
Train loss: 0.15630929098339882


Epoch:  60%|██████    | 3/5 [01:55<01:16, 38.37s/it]

Validation Accuracy: 0.893018018018018
Train loss: 0.09713975124622101


Epoch:  80%|████████  | 4/5 [02:33<00:38, 38.36s/it]

Validation Accuracy: 0.902027027027027
Train loss: 0.06295521385549736


Epoch: 100%|██████████| 5/5 [03:11<00:00, 38.36s/it]

Validation Accuracy: 0.9003378378378378





Weighted F1 Score: 0.891399653967264
Macro F1 Score: 0.8350215081436817
Accuracy score: 0.8929188255613126 

The Average  Weighted F1-Score of the Language  Arabic is: 0.8998807266942185
The Average  Macro F1-Score of the Language  Arabic is: 0.8482725556661921


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense

Train loss: 0.5197336050813601


Epoch:  20%|██        | 1/5 [01:05<04:22, 65.72s/it]

Validation Accuracy: 0.8225524475524476
Train loss: 0.37277104894388213


Epoch:  40%|████      | 2/5 [02:11<03:16, 65.67s/it]

Validation Accuracy: 0.875
Train loss: 0.2984921696880773


Epoch:  60%|██████    | 3/5 [03:16<02:11, 65.63s/it]

Validation Accuracy: 0.9155375874125874
Train loss: 0.24781449089787866


Epoch:  80%|████████  | 4/5 [04:22<01:05, 65.60s/it]

Validation Accuracy: 0.9332386363636364
Train loss: 0.2031506095761986


Epoch: 100%|██████████| 5/5 [05:27<00:00, 65.58s/it]

Validation Accuracy: 0.9340034965034966






Zero Shot Model for test: English 

Weighted F1 Score: 0.7568607824813645
Macro F1 Score: 0.5590009465496957
Accuracy score: 0.7945288638476278 


Model Summary:
Language: English
Seed value: 2018


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense

Train loss: 0.36120438524302834


Epoch:  20%|██        | 1/5 [11:36<46:24, 696.05s/it]

Validation Accuracy: 0.8508093070308548
Train loss: 0.31638229705365856


Epoch:  40%|████      | 2/5 [23:12<34:48, 696.23s/it]

Validation Accuracy: 0.8565882650480526
Train loss: 0.2834163580750798


Epoch:  60%|██████    | 3/5 [34:50<23:13, 696.61s/it]

Validation Accuracy: 0.8612417804754678
Train loss: 0.24601886915670876


Epoch:  80%|████████  | 4/5 [46:27<11:36, 696.88s/it]

Validation Accuracy: 0.8590477996965098
Train loss: 0.2116280013976795


Epoch: 100%|██████████| 5/5 [58:04<00:00, 696.87s/it]

Validation Accuracy: 0.8616021750126455





Weighted F1 Score: 0.8611457140498046
Macro F1 Score: 0.7736580630256132
Accuracy score: 0.8633690600398368 


Model Summary:
Language: English
Seed value: 2019


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense

Train loss: 0.3881661957156082


Epoch:  20%|██        | 1/5 [11:36<46:25, 696.37s/it]

Validation Accuracy: 0.8485078401618613
Train loss: 0.3237153235423172


Epoch:  40%|████      | 2/5 [23:13<34:49, 696.64s/it]

Validation Accuracy: 0.868063985837127
Train loss: 0.2905816528316262


Epoch:  60%|██████    | 3/5 [34:50<23:13, 696.65s/it]

Validation Accuracy: 0.8712000505816894
Train loss: 0.2585893683648312


Epoch:  80%|████████  | 4/5 [46:26<11:36, 696.58s/it]

Validation Accuracy: 0.8652250885179564
Train loss: 0.2260776189386638


Epoch: 100%|██████████| 5/5 [58:03<00:00, 696.62s/it]

Validation Accuracy: 0.868715225088518





Weighted F1 Score: 0.8601001324082551
Macro F1 Score: 0.7737363834608356
Accuracy score: 0.8611875177843119 


Model Summary:
Language: English
Seed value: 2020


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense

Train loss: 0.35840046078607896


Epoch:  20%|██        | 1/5 [11:37<46:29, 697.38s/it]

Validation Accuracy: 0.8592501264542235
Train loss: 0.3137555369970411


Epoch:  40%|████      | 2/5 [23:14<34:51, 697.26s/it]

Validation Accuracy: 0.8637013151239251
Train loss: 0.31825844710622103


Epoch:  60%|██████    | 3/5 [34:50<23:13, 696.92s/it]

Validation Accuracy: 0.8586747597369753
Train loss: 0.2708698518359426


Epoch:  80%|████████  | 4/5 [46:26<11:36, 696.80s/it]

Validation Accuracy: 0.8665591805766313
Train loss: 0.24352293666480435


Epoch: 100%|██████████| 5/5 [58:03<00:00, 696.68s/it]

Validation Accuracy: 0.8598191704602933





Weighted F1 Score: 0.8637769310424932
Macro F1 Score: 0.7766964910627228
Accuracy score: 0.8667836479180498 


Model Summary:
Language: English
Seed value: 2021


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense

Train loss: 0.35709880783535347


Epoch:  20%|██        | 1/5 [11:37<46:28, 697.12s/it]

Validation Accuracy: 0.8568538189175517
Train loss: 0.3194343811056502


Epoch:  40%|████      | 2/5 [23:14<34:51, 697.27s/it]

Validation Accuracy: 0.8671029337379869
Train loss: 0.2848335175358894


Epoch:  60%|██████    | 3/5 [34:51<23:14, 697.12s/it]

Validation Accuracy: 0.8650227617602427
Train loss: 0.24903473242715166


Epoch:  80%|████████  | 4/5 [46:26<11:36, 696.53s/it]

Validation Accuracy: 0.866831057157309
Train loss: 0.21495501431375064


Epoch: 100%|██████████| 5/5 [58:02<00:00, 696.47s/it]

Validation Accuracy: 0.8689997470915529





Weighted F1 Score: 0.857066481971672
Macro F1 Score: 0.7660983043582515
Accuracy score: 0.8599544721616238 


Model Summary:
Language: English
Seed value: 2022


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense

Train loss: 0.358400487027854


Epoch:  20%|██        | 1/5 [11:35<46:22, 695.57s/it]

Validation Accuracy: 0.8569613050075872
Train loss: 0.312022206006677


Epoch:  40%|████      | 2/5 [23:11<34:47, 695.69s/it]

Validation Accuracy: 0.8667488619119879
Train loss: 0.27882615593095456


Epoch:  60%|██████    | 3/5 [34:48<23:12, 696.01s/it]

Validation Accuracy: 0.8677794638340921
Train loss: 0.23995585219238866


Epoch:  80%|████████  | 4/5 [46:25<11:36, 696.22s/it]

Validation Accuracy: 0.868259989883662
Train loss: 0.203045981643415


Epoch: 100%|██████████| 5/5 [58:01<00:00, 696.26s/it]

Validation Accuracy: 0.8654906423874558





Weighted F1 Score: 0.8605045555914574
Macro F1 Score: 0.7719418264175988
Accuracy score: 0.8631793607132695 

The Average  Weighted F1-Score of the Language  English is: 0.8605187630127364
The Average  Macro F1-Score of the Language  English is: 0.7724262136650044


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense

Train loss: 0.3685951924133457


Epoch:  20%|██        | 1/5 [17:19<1:09:16, 1039.12s/it]

Validation Accuracy: 0.8521012931034483
Train loss: 0.319102366432524


Epoch:  40%|████      | 2/5 [34:38<51:57, 1039.12s/it]  

Validation Accuracy: 0.8750897988505747
Train loss: 0.2926776656365672


Epoch:  60%|██████    | 3/5 [51:57<34:38, 1039.19s/it]

Validation Accuracy: 0.8901760057471264
Train loss: 0.2657543064785499


Epoch:  80%|████████  | 4/5 [1:09:17<17:19, 1039.26s/it]

Validation Accuracy: 0.905621408045977
Train loss: 0.23953153425084012


Epoch: 100%|██████████| 5/5 [1:26:37<00:00, 1039.42s/it]

Validation Accuracy: 0.9135237068965517






Zero Shot Model for test: French 

Weighted F1 Score: 0.6582832958912695
Macro F1 Score: 0.584831632430229
Accuracy score: 0.6934426229508197 


Model Summary:
Language: French
Seed value: 2018


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense

Train loss: 0.5960467568150273


Epoch:  20%|██        | 1/5 [00:08<00:32,  8.17s/it]

Validation Accuracy: 0.653125
Train loss: 0.49616401201045074


Epoch:  40%|████      | 2/5 [00:16<00:24,  8.18s/it]

Validation Accuracy: 0.6375
Train loss: 0.4077246556127513


Epoch:  60%|██████    | 3/5 [00:24<00:16,  8.21s/it]

Validation Accuracy: 0.6171875
Train loss: 0.29622458921814404


Epoch:  80%|████████  | 4/5 [00:32<00:08,  8.21s/it]

Validation Accuracy: 0.5890625
Train loss: 0.22328302146935905


Epoch: 100%|██████████| 5/5 [00:41<00:00,  8.21s/it]

Validation Accuracy: 0.68125





Weighted F1 Score: 0.7303481697527759
Macro F1 Score: 0.6950693024312656
Accuracy score: 0.7295081967213115 


Model Summary:
Language: French
Seed value: 2019


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense

Train loss: 0.5985802207831983


Epoch:  20%|██        | 1/5 [00:08<00:32,  8.13s/it]

Validation Accuracy: 0.640625
Train loss: 0.48058044413725537


Epoch:  40%|████      | 2/5 [00:16<00:24,  8.12s/it]

Validation Accuracy: 0.6328125
Train loss: 0.35988784098514803


Epoch:  60%|██████    | 3/5 [00:24<00:16,  8.12s/it]

Validation Accuracy: 0.625
Train loss: 0.25943213659856057


Epoch:  80%|████████  | 4/5 [00:32<00:08,  8.12s/it]

Validation Accuracy: 0.6671875
Train loss: 0.16485585310254935


Epoch: 100%|██████████| 5/5 [00:40<00:00,  8.11s/it]

Validation Accuracy: 0.6578125





Weighted F1 Score: 0.7266684429735377
Macro F1 Score: 0.6869118905047049
Accuracy score: 0.7295081967213115 


Model Summary:
Language: French
Seed value: 2020


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense

Train loss: 0.6177335193863621


Epoch:  20%|██        | 1/5 [00:08<00:32,  8.12s/it]

Validation Accuracy: 0.684375
Train loss: 0.48487425567927184


Epoch:  40%|████      | 2/5 [00:16<00:24,  8.12s/it]

Validation Accuracy: 0.65625
Train loss: 0.3934462702384702


Epoch:  60%|██████    | 3/5 [00:24<00:16,  8.12s/it]

Validation Accuracy: 0.696875
Train loss: 0.30564809452604363


Epoch:  80%|████████  | 4/5 [00:32<00:08,  8.12s/it]

Validation Accuracy: 0.68125
Train loss: 0.23027063657840094


Epoch: 100%|██████████| 5/5 [00:40<00:00,  8.11s/it]

Validation Accuracy: 0.6984375





Weighted F1 Score: 0.6744010088272384
Macro F1 Score: 0.6246153846153846
Accuracy score: 0.680327868852459 


Model Summary:
Language: French
Seed value: 2021


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense

Train loss: 0.596904817002791


Epoch:  20%|██        | 1/5 [00:08<00:32,  8.11s/it]

Validation Accuracy: 0.6734375
Train loss: 0.4992177271180683


Epoch:  40%|████      | 2/5 [00:16<00:24,  8.12s/it]

Validation Accuracy: 0.70625
Train loss: 0.3879742696881294


Epoch:  60%|██████    | 3/5 [00:24<00:16,  8.12s/it]

Validation Accuracy: 0.7046875
Train loss: 0.31147828339426603


Epoch:  80%|████████  | 4/5 [00:32<00:08,  8.11s/it]

Validation Accuracy: 0.7015625
Train loss: 0.1961522346569432


Epoch: 100%|██████████| 5/5 [00:40<00:00,  8.11s/it]

Validation Accuracy: 0.7296875





Weighted F1 Score: 0.7568531887721858
Macro F1 Score: 0.7187599364069952
Accuracy score: 0.7622950819672131 


Model Summary:
Language: French
Seed value: 2022


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense

Train loss: 0.5771397291510193


Epoch:  20%|██        | 1/5 [00:08<00:32,  8.10s/it]

Validation Accuracy: 0.6125
Train loss: 0.4623527571007057


Epoch:  40%|████      | 2/5 [00:16<00:24,  8.10s/it]

Validation Accuracy: 0.6484375
Train loss: 0.36395569504411135


Epoch:  60%|██████    | 3/5 [00:24<00:16,  8.10s/it]

Validation Accuracy: 0.5765625
Train loss: 0.20475372199521022


Epoch:  80%|████████  | 4/5 [00:32<00:08,  8.10s/it]

Validation Accuracy: 0.6140625
Train loss: 0.12859782145393114


Epoch: 100%|██████████| 5/5 [00:40<00:00,  8.10s/it]

Validation Accuracy: 0.5984375





Weighted F1 Score: 0.6941114907723125
Macro F1 Score: 0.660590087122044
Accuracy score: 0.6885245901639344 

The Average  Weighted F1-Score of the Language  French is: 0.7164764602196101
The Average  Macro F1-Score of the Language  French is: 0.6771893202160788
Average Weighted F1-Score 0.8998807266942185 and Average Macro F1-Score 0.8482725556661921 of Arabic 

Average Weighted F1-Score 0.8605187630127364 and Average Macro F1-Score 0.7724262136650044 of English 

Average Weighted F1-Score 0.7164764602196101 and Average Macro F1-Score 0.6771893202160788 of French 



In [None]:
# Downloading: 100%
# 5.07M/5.07M [00:01<00:00, 3.25MB/s]

# Downloading: 100%
# 512/512 [00:00<00:00, 1.26kB/s]

# Downloading: 100%
# 1.12G/1.12G [00:16<00:00, 67.5MB/s]

# Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
# - This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
# - This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
# Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']
# You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
# Epoch:   0%|          | 0/5 [00:00<?, ?it/s]Train loss: 0.3734834009796569
# Epoch:  20%|██        | 1/5 [16:39<1:06:38, 999.74s/it]Validation Accuracy: 0.8605697151424287
# Train loss: 0.32180686287823346
# Epoch:  40%|████      | 2/5 [33:17<49:57, 999.17s/it]  Validation Accuracy: 0.8744586040313177
# Train loss: 0.2944139986020392
# Epoch:  60%|██████    | 3/5 [49:54<33:17, 998.62s/it]Validation Accuracy: 0.8913980509745127
# Train loss: 0.26376109515860824
# Epoch:  80%|████████  | 4/5 [1:06:31<16:38, 998.07s/it]Validation Accuracy: 0.9080043311677495
# Train loss: 0.23477912550828983
# Epoch: 100%|██████████| 5/5 [1:23:08<00:00, 997.65s/it]Validation Accuracy: 0.9182179743461603


# Zero Shot Model for test: Arabic 

# Weighted F1 Score: 0.7539611137082988
# Macro F1 Score: 0.579280906376903
# Accuracy score: 0.796821008984105 

# ====================================================================================================

# Model Summary:
# Language: Arabic
# Seed value: 2018
# Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
# - This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
# - This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
# Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']
# You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
# Epoch:   0%|          | 0/5 [00:00<?, ?it/s]Train loss: 0.33319818572412563
# Epoch:  20%|██        | 1/5 [00:38<02:35, 38.78s/it]Validation Accuracy: 0.893581081081081
# Train loss: 0.24456842366869994
# Epoch:  40%|████      | 2/5 [01:17<01:55, 38.65s/it]Validation Accuracy: 0.893581081081081
# Train loss: 0.1924621929583235
# Epoch:  60%|██████    | 3/5 [01:55<01:17, 38.56s/it]Validation Accuracy: 0.8969594594594594
# Train loss: 0.13619173324564252
# Epoch:  80%|████████  | 4/5 [02:33<00:38, 38.50s/it]Validation Accuracy: 0.9037162162162162
# Train loss: 0.10047227145707983
# Epoch: 100%|██████████| 5/5 [03:12<00:00, 38.44s/it]Validation Accuracy: 0.9037162162162162

# Weighted F1 Score: 0.9030220797171122
# Macro F1 Score: 0.8530112353126162
# Accuracy score: 0.9041450777202072 

# ====================================================================================================

# Model Summary:
# Language: Arabic
# Seed value: 2019
# Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
# - This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
# - This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
# Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']
# You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
# Epoch:   0%|          | 0/5 [00:00<?, ?it/s]Train loss: 0.3348154091078231
# Epoch:  20%|██        | 1/5 [00:38<02:33, 38.36s/it]Validation Accuracy: 0.8902027027027027
# Train loss: 0.23595602606518531
# Epoch:  40%|████      | 2/5 [01:16<01:55, 38.36s/it]Validation Accuracy: 0.902027027027027
# Train loss: 0.16212986346598215
# Epoch:  60%|██████    | 3/5 [01:55<01:16, 38.36s/it]Validation Accuracy: 0.8806306306306307
# Train loss: 0.11001020638587496
# Epoch:  80%|████████  | 4/5 [02:33<00:38, 38.36s/it]Validation Accuracy: 0.8969594594594594
# Train loss: 0.06901852041207929
# Epoch: 100%|██████████| 5/5 [03:11<00:00, 38.37s/it]Validation Accuracy: 0.8969594594594594

# Weighted F1 Score: 0.8951135569321306
# Macro F1 Score: 0.8422156784360977
# Accuracy score: 0.8955094991364422 

# ====================================================================================================

# Model Summary:
# Language: Arabic
# Seed value: 2020
# Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
# - This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
# - This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
# Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']
# You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
# Epoch:   0%|          | 0/5 [00:00<?, ?it/s]Train loss: 0.3392985665833387
# Epoch:  20%|██        | 1/5 [00:38<02:33, 38.36s/it]Validation Accuracy: 0.8997747747747747
# Train loss: 0.23165368424420515
# Epoch:  40%|████      | 2/5 [01:16<01:55, 38.36s/it]Validation Accuracy: 0.910472972972973
# Train loss: 0.16688190930060984
# Epoch:  60%|██████    | 3/5 [01:55<01:16, 38.37s/it]Validation Accuracy: 0.8918918918918919
# Train loss: 0.11207939455318668
# Epoch:  80%|████████  | 4/5 [02:33<00:38, 38.37s/it]Validation Accuracy: 0.902027027027027
# Train loss: 0.07831082947774035
# Epoch: 100%|██████████| 5/5 [03:11<00:00, 38.37s/it]Validation Accuracy: 0.9121621621621622

# Weighted F1 Score: 0.9152442062445174
# Macro F1 Score: 0.8727849122932884
# Accuracy score: 0.9153713298791019 

# ====================================================================================================

# Model Summary:
# Language: Arabic
# Seed value: 2021
# Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
# - This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
# - This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
# Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']
# You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
# Epoch:   0%|          | 0/5 [00:00<?, ?it/s]Train loss: 0.3347698391275847
# Epoch:  20%|██        | 1/5 [00:38<02:33, 38.35s/it]Validation Accuracy: 0.8952702702702703
# Train loss: 0.2297513915416528
# Epoch:  40%|████      | 2/5 [01:16<01:55, 38.35s/it]Validation Accuracy: 0.8986486486486487
# Train loss: 0.1544889106825874
# Epoch:  60%|██████    | 3/5 [01:55<01:16, 38.35s/it]Validation Accuracy: 0.893581081081081
# Train loss: 0.10174737859693334
# Epoch:  80%|████████  | 4/5 [02:33<00:38, 38.35s/it]Validation Accuracy: 0.8868243243243243
# Train loss: 0.06801824952479393
# Epoch: 100%|██████████| 5/5 [03:11<00:00, 38.35s/it]Validation Accuracy: 0.8918918918918919

# Weighted F1 Score: 0.8946241366100686
# Macro F1 Score: 0.8383294441452761
# Accuracy score: 0.8972366148531952 

# ====================================================================================================

# Model Summary:
# Language: Arabic
# Seed value: 2022
# Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
# - This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
# - This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
# Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']
# You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
# Epoch:   0%|          | 0/5 [00:00<?, ?it/s]Train loss: 0.3164765451206114
# Epoch:  20%|██        | 1/5 [00:38<02:33, 38.37s/it]Validation Accuracy: 0.8811936936936936
# Train loss: 0.2179719464941405
# Epoch:  40%|████      | 2/5 [01:16<01:55, 38.37s/it]Validation Accuracy: 0.8800675675675675
# Train loss: 0.15630929098339882
# Epoch:  60%|██████    | 3/5 [01:55<01:16, 38.37s/it]Validation Accuracy: 0.893018018018018
# Train loss: 0.09713975124622101
# Epoch:  80%|████████  | 4/5 [02:33<00:38, 38.36s/it]Validation Accuracy: 0.902027027027027
# Train loss: 0.06295521385549736
# Epoch: 100%|██████████| 5/5 [03:11<00:00, 38.36s/it]Validation Accuracy: 0.9003378378378378

# Weighted F1 Score: 0.891399653967264
# Macro F1 Score: 0.8350215081436817
# Accuracy score: 0.8929188255613126 

# ====================================================================================================
# The Average  Weighted F1-Score of the Language  Arabic is: 0.8998807266942185
# The Average  Macro F1-Score of the Language  Arabic is: 0.8482725556661921
# ========================================================================================================================================================================================================
# Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
# - This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
# - This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
# Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']
# You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
# Epoch:   0%|          | 0/5 [00:00<?, ?it/s]Train loss: 0.5197336050813601
# Epoch:  20%|██        | 1/5 [01:05<04:22, 65.72s/it]Validation Accuracy: 0.8225524475524476
# Train loss: 0.37277104894388213
# Epoch:  40%|████      | 2/5 [02:11<03:16, 65.67s/it]Validation Accuracy: 0.875
# Train loss: 0.2984921696880773
# Epoch:  60%|██████    | 3/5 [03:16<02:11, 65.63s/it]Validation Accuracy: 0.9155375874125874
# Train loss: 0.24781449089787866
# Epoch:  80%|████████  | 4/5 [04:22<01:05, 65.60s/it]Validation Accuracy: 0.9332386363636364
# Train loss: 0.2031506095761986
# Epoch: 100%|██████████| 5/5 [05:27<00:00, 65.58s/it]Validation Accuracy: 0.9340034965034966


# Zero Shot Model for test: English 

# Weighted F1 Score: 0.7568607824813645
# Macro F1 Score: 0.5590009465496957
# Accuracy score: 0.7945288638476278 

# ====================================================================================================

# Model Summary:
# Language: English
# Seed value: 2018
# Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
# - This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
# - This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
# Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']
# You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
# Epoch:   0%|          | 0/5 [00:00<?, ?it/s]Train loss: 0.36120438524302834
# Epoch:  20%|██        | 1/5 [11:36<46:24, 696.05s/it]Validation Accuracy: 0.8508093070308548
# Train loss: 0.31638229705365856
# Epoch:  40%|████      | 2/5 [23:12<34:48, 696.23s/it]Validation Accuracy: 0.8565882650480526
# Train loss: 0.2834163580750798
# Epoch:  60%|██████    | 3/5 [34:50<23:13, 696.61s/it]Validation Accuracy: 0.8612417804754678
# Train loss: 0.24601886915670876
# Epoch:  80%|████████  | 4/5 [46:27<11:36, 696.88s/it]Validation Accuracy: 0.8590477996965098
# Train loss: 0.2116280013976795
# Epoch: 100%|██████████| 5/5 [58:04<00:00, 696.87s/it]Validation Accuracy: 0.8616021750126455

# Weighted F1 Score: 0.8611457140498046
# Macro F1 Score: 0.7736580630256132
# Accuracy score: 0.8633690600398368 

# ====================================================================================================

# Model Summary:
# Language: English
# Seed value: 2019
# Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
# - This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
# - This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
# Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']
# You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
# Epoch:   0%|          | 0/5 [00:00<?, ?it/s]Train loss: 0.3881661957156082
# Epoch:  20%|██        | 1/5 [11:36<46:25, 696.37s/it]Validation Accuracy: 0.8485078401618613
# Train loss: 0.3237153235423172
# Epoch:  40%|████      | 2/5 [23:13<34:49, 696.64s/it]Validation Accuracy: 0.868063985837127
# Train loss: 0.2905816528316262
# Epoch:  60%|██████    | 3/5 [34:50<23:13, 696.65s/it]Validation Accuracy: 0.8712000505816894
# Train loss: 0.2585893683648312
# Epoch:  80%|████████  | 4/5 [46:26<11:36, 696.58s/it]Validation Accuracy: 0.8652250885179564
# Train loss: 0.2260776189386638
# Epoch: 100%|██████████| 5/5 [58:03<00:00, 696.62s/it]Validation Accuracy: 0.868715225088518

# Weighted F1 Score: 0.8601001324082551
# Macro F1 Score: 0.7737363834608356
# Accuracy score: 0.8611875177843119 

# ====================================================================================================

# Model Summary:
# Language: English
# Seed value: 2020
# Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
# - This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
# - This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
# Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']
# You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
# Epoch:   0%|          | 0/5 [00:00<?, ?it/s]Train loss: 0.35840046078607896
# Epoch:  20%|██        | 1/5 [11:37<46:29, 697.38s/it]Validation Accuracy: 0.8592501264542235
# Train loss: 0.3137555369970411
# Epoch:  40%|████      | 2/5 [23:14<34:51, 697.26s/it]Validation Accuracy: 0.8637013151239251
# Train loss: 0.31825844710622103
# Epoch:  60%|██████    | 3/5 [34:50<23:13, 696.92s/it]Validation Accuracy: 0.8586747597369753
# Train loss: 0.2708698518359426
# Epoch:  80%|████████  | 4/5 [46:26<11:36, 696.80s/it]Validation Accuracy: 0.8665591805766313
# Train loss: 0.24352293666480435
# Epoch: 100%|██████████| 5/5 [58:03<00:00, 696.68s/it]Validation Accuracy: 0.8598191704602933

# Weighted F1 Score: 0.8637769310424932
# Macro F1 Score: 0.7766964910627228
# Accuracy score: 0.8667836479180498 

# ====================================================================================================

# Model Summary:
# Language: English
# Seed value: 2021
# Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
# - This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
# - This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
# Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']
# You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
# Epoch:   0%|          | 0/5 [00:00<?, ?it/s]Train loss: 0.35709880783535347
# Epoch:  20%|██        | 1/5 [11:37<46:28, 697.12s/it]Validation Accuracy: 0.8568538189175517
# Train loss: 0.3194343811056502
# Epoch:  40%|████      | 2/5 [23:14<34:51, 697.27s/it]Validation Accuracy: 0.8671029337379869
# Train loss: 0.2848335175358894
# Epoch:  60%|██████    | 3/5 [34:51<23:14, 697.12s/it]Validation Accuracy: 0.8650227617602427
# Train loss: 0.24903473242715166
# Epoch:  80%|████████  | 4/5 [46:26<11:36, 696.53s/it]Validation Accuracy: 0.866831057157309
# Train loss: 0.21495501431375064
# Epoch: 100%|██████████| 5/5 [58:02<00:00, 696.47s/it]Validation Accuracy: 0.8689997470915529

# Weighted F1 Score: 0.857066481971672
# Macro F1 Score: 0.7660983043582515
# Accuracy score: 0.8599544721616238 

# ====================================================================================================

# Model Summary:
# Language: English
# Seed value: 2022
# Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
# - This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
# - This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
# Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']
# You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
# Epoch:   0%|          | 0/5 [00:00<?, ?it/s]Train loss: 0.358400487027854
# Epoch:  20%|██        | 1/5 [11:35<46:22, 695.57s/it]Validation Accuracy: 0.8569613050075872
# Train loss: 0.312022206006677
# Epoch:  40%|████      | 2/5 [23:11<34:47, 695.69s/it]Validation Accuracy: 0.8667488619119879
# Train loss: 0.27882615593095456
# Epoch:  60%|██████    | 3/5 [34:48<23:12, 696.01s/it]Validation Accuracy: 0.8677794638340921
# Train loss: 0.23995585219238866
# Epoch:  80%|████████  | 4/5 [46:25<11:36, 696.22s/it]Validation Accuracy: 0.868259989883662
# Train loss: 0.203045981643415
# Epoch: 100%|██████████| 5/5 [58:01<00:00, 696.26s/it]Validation Accuracy: 0.8654906423874558

# Weighted F1 Score: 0.8605045555914574
# Macro F1 Score: 0.7719418264175988
# Accuracy score: 0.8631793607132695 

# ====================================================================================================
# The Average  Weighted F1-Score of the Language  English is: 0.8605187630127364
# The Average  Macro F1-Score of the Language  English is: 0.7724262136650044
# ========================================================================================================================================================================================================
# Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
# - This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
# - This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
# Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']
# You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
# Epoch:   0%|          | 0/5 [00:00<?, ?it/s]Train loss: 0.3685951924133457
# Epoch:  20%|██        | 1/5 [17:19<1:09:16, 1039.12s/it]Validation Accuracy: 0.8521012931034483
# Train loss: 0.319102366432524
# Epoch:  40%|████      | 2/5 [34:38<51:57, 1039.12s/it]  Validation Accuracy: 0.8750897988505747
# Train loss: 0.2926776656365672
# Epoch:  60%|██████    | 3/5 [51:57<34:38, 1039.19s/it]Validation Accuracy: 0.8901760057471264
# Train loss: 0.2657543064785499
# Epoch:  80%|████████  | 4/5 [1:09:17<17:19, 1039.26s/it]Validation Accuracy: 0.905621408045977
# Train loss: 0.23953153425084012
# Epoch: 100%|██████████| 5/5 [1:26:37<00:00, 1039.42s/it]Validation Accuracy: 0.9135237068965517


# Zero Shot Model for test: French 

# Weighted F1 Score: 0.6582832958912695
# Macro F1 Score: 0.584831632430229
# Accuracy score: 0.6934426229508197 

# ====================================================================================================

# Model Summary:
# Language: French
# Seed value: 2018
# Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
# - This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
# - This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
# Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']
# You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
# Epoch:   0%|          | 0/5 [00:00<?, ?it/s]Train loss: 0.5960467568150273
# Epoch:  20%|██        | 1/5 [00:08<00:32,  8.17s/it]Validation Accuracy: 0.653125
# Train loss: 0.49616401201045074
# Epoch:  40%|████      | 2/5 [00:16<00:24,  8.18s/it]Validation Accuracy: 0.6375
# Train loss: 0.4077246556127513
# Epoch:  60%|██████    | 3/5 [00:24<00:16,  8.21s/it]Validation Accuracy: 0.6171875
# Train loss: 0.29622458921814404
# Epoch:  80%|████████  | 4/5 [00:32<00:08,  8.21s/it]Validation Accuracy: 0.5890625
# Train loss: 0.22328302146935905
# Epoch: 100%|██████████| 5/5 [00:41<00:00,  8.21s/it]Validation Accuracy: 0.68125

# Weighted F1 Score: 0.7303481697527759
# Macro F1 Score: 0.6950693024312656
# Accuracy score: 0.7295081967213115 

# ====================================================================================================

# Model Summary:
# Language: French
# Seed value: 2019
# Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
# - This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
# - This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
# Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']
# You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
# Epoch:   0%|          | 0/5 [00:00<?, ?it/s]Train loss: 0.5985802207831983
# Epoch:  20%|██        | 1/5 [00:08<00:32,  8.13s/it]Validation Accuracy: 0.640625
# Train loss: 0.48058044413725537
# Epoch:  40%|████      | 2/5 [00:16<00:24,  8.12s/it]Validation Accuracy: 0.6328125
# Train loss: 0.35988784098514803
# Epoch:  60%|██████    | 3/5 [00:24<00:16,  8.12s/it]Validation Accuracy: 0.625
# Train loss: 0.25943213659856057
# Epoch:  80%|████████  | 4/5 [00:32<00:08,  8.12s/it]Validation Accuracy: 0.6671875
# Train loss: 0.16485585310254935
# Epoch: 100%|██████████| 5/5 [00:40<00:00,  8.11s/it]Validation Accuracy: 0.6578125

# Weighted F1 Score: 0.7266684429735377
# Macro F1 Score: 0.6869118905047049
# Accuracy score: 0.7295081967213115 

# ====================================================================================================

# Model Summary:
# Language: French
# Seed value: 2020
# Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
# - This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
# - This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
# Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']
# You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
# Epoch:   0%|          | 0/5 [00:00<?, ?it/s]Train loss: 0.6177335193863621
# Epoch:  20%|██        | 1/5 [00:08<00:32,  8.12s/it]Validation Accuracy: 0.684375
# Train loss: 0.48487425567927184
# Epoch:  40%|████      | 2/5 [00:16<00:24,  8.12s/it]Validation Accuracy: 0.65625
# Train loss: 0.3934462702384702
# Epoch:  60%|██████    | 3/5 [00:24<00:16,  8.12s/it]Validation Accuracy: 0.696875
# Train loss: 0.30564809452604363
# Epoch:  80%|████████  | 4/5 [00:32<00:08,  8.12s/it]Validation Accuracy: 0.68125
# Train loss: 0.23027063657840094
# Epoch: 100%|██████████| 5/5 [00:40<00:00,  8.11s/it]Validation Accuracy: 0.6984375

# Weighted F1 Score: 0.6744010088272384
# Macro F1 Score: 0.6246153846153846
# Accuracy score: 0.680327868852459 

# ====================================================================================================

# Model Summary:
# Language: French
# Seed value: 2021
# Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
# - This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
# - This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
# Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']
# You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
# Epoch:   0%|          | 0/5 [00:00<?, ?it/s]Train loss: 0.596904817002791
# Epoch:  20%|██        | 1/5 [00:08<00:32,  8.11s/it]Validation Accuracy: 0.6734375
# Train loss: 0.4992177271180683
# Epoch:  40%|████      | 2/5 [00:16<00:24,  8.12s/it]Validation Accuracy: 0.70625
# Train loss: 0.3879742696881294
# Epoch:  60%|██████    | 3/5 [00:24<00:16,  8.12s/it]Validation Accuracy: 0.7046875
# Train loss: 0.31147828339426603
# Epoch:  80%|████████  | 4/5 [00:32<00:08,  8.11s/it]Validation Accuracy: 0.7015625
# Train loss: 0.1961522346569432
# Epoch: 100%|██████████| 5/5 [00:40<00:00,  8.11s/it]Validation Accuracy: 0.7296875

# Weighted F1 Score: 0.7568531887721858
# Macro F1 Score: 0.7187599364069952
# Accuracy score: 0.7622950819672131 

# ====================================================================================================

# Model Summary:
# Language: French
# Seed value: 2022
# Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
# - This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
# - This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
# Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']
# You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
# Epoch:   0%|          | 0/5 [00:00<?, ?it/s]Train loss: 0.5771397291510193
# Epoch:  20%|██        | 1/5 [00:08<00:32,  8.10s/it]Validation Accuracy: 0.6125
# Train loss: 0.4623527571007057
# Epoch:  40%|████      | 2/5 [00:16<00:24,  8.10s/it]Validation Accuracy: 0.6484375
# Train loss: 0.36395569504411135
# Epoch:  60%|██████    | 3/5 [00:24<00:16,  8.10s/it]Validation Accuracy: 0.5765625
# Train loss: 0.20475372199521022
# Epoch:  80%|████████  | 4/5 [00:32<00:08,  8.10s/it]Validation Accuracy: 0.6140625
# Train loss: 0.12859782145393114
# Epoch: 100%|██████████| 5/5 [00:40<00:00,  8.10s/it]Validation Accuracy: 0.5984375

# Weighted F1 Score: 0.6941114907723125
# Macro F1 Score: 0.660590087122044
# Accuracy score: 0.6885245901639344 

# ====================================================================================================
# The Average  Weighted F1-Score of the Language  French is: 0.7164764602196101
# The Average  Macro F1-Score of the Language  French is: 0.6771893202160788
# ========================================================================================================================================================================================================
# Average Weighted F1-Score 0.8998807266942185 and Average Macro F1-Score 0.8482725556661921 of Arabic 

# Average Weighted F1-Score 0.8605187630127364 and Average Macro F1-Score 0.7724262136650044 of English 

# Average Weighted F1-Score 0.7164764602196101 and Average Macro F1-Score 0.6771893202160788 of French 
