In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install pytorch-pretrained-bert pytorch-nlp

Collecting pytorch-pretrained-bert
[?25l  Downloading https://files.pythonhosted.org/packages/d7/e0/c08d5553b89973d9a240605b9c12404bcf8227590de62bae27acbcfe076b/pytorch_pretrained_bert-0.6.2-py3-none-any.whl (123kB)
[K     |████████████████████████████████| 133kB 2.8MB/s 
[?25hCollecting pytorch-nlp
[?25l  Downloading https://files.pythonhosted.org/packages/4f/51/f0ee1efb75f7cc2e3065c5da1363d6be2eec79691b2821594f3f2329528c/pytorch_nlp-0.5.0-py3-none-any.whl (90kB)
[K     |████████████████████████████████| 92kB 6.3MB/s 
Collecting boto3
[?25l  Downloading https://files.pythonhosted.org/packages/85/54/099a2ea5d4b2d5931a26f280a7585f613b1fafaac9189e489a9e25004a01/boto3-1.16.13-py2.py3-none-any.whl (129kB)
[K     |████████████████████████████████| 133kB 8.5MB/s 
Collecting botocore<1.20.0,>=1.19.13
[?25l  Downloading https://files.pythonhosted.org/packages/99/40/b5e681d80dc46bafd0dc2e55266190cc432dfd5b72b9e7e1c5743aa6c362/botocore-1.19.13-py2.py3-none-any.whl (6.7MB)
[K     |██████

In [3]:
import tensorflow as tf
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from pytorch_pretrained_bert import BertTokenizer, BertConfig, BertAdam, BertForSequenceClassification
from tqdm import tqdm, trange
import pandas as pd
import io
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score, accuracy_score
from statistics import mode

import warnings

warnings.simplefilter("ignore", UserWarning)
warnings.simplefilter("ignore", FutureWarning)
warnings.simplefilter("ignore", DeprecationWarning)

# specify GPU device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()
torch.cuda.get_device_name(0)

'Tesla P100-PCIE-16GB'

In [4]:
def train_validate_split(df,seed=42,validate_percent = 0.1):
  train, validate = train_test_split(df, test_size=validate_percent, stratify=df['label'])
  return train, validate

def train_validate_test_split(df,seed, train_percent=.8, validate_percent=.125):
  train, test = train_test_split(df, train_size=train_percent, stratify=df['label'])
  train, validate = train_test_split(train, test_size=validate_percent, stratify=train['label'])
  return train, validate, test

def sample_data(df,sample,seed):
    X_train, _, y_train, _ = train_test_split( df['tweet'], df['label'], train_size=sample, random_state=seed, stratify=df['label'])
    return pd.concat([X_train,y_train], axis = 1 )

def tokenize_data(df):
    sentences = ["[CLS] " + query + " [SEP]" for query in df['tweet']]
    # Tokenize with multilingual BERT tokenizer
    tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-uncased', do_lower_case=True)
    tokenized_texts = [tokenizer.tokenize(sent) for sent in sentences]
    MAX_LEN = 128

    # Use the BERT tokenizer to convert the tokens to their index numbers in the BERT vocabulary
    input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts],
                          maxlen=MAX_LEN, dtype="long", truncating="post", padding="post")
    # Create attention masks
    attention_masks = []
    # Create a mask of 1s for each token followed by 0s for padding
    for seq in input_ids:
        seq_mask = [float(i>0) for i in seq]
        attention_masks.append(seq_mask)
    return input_ids, attention_masks

def Data_Loader(inputs_ids, attention_masks, df, batch_size=16):
    data = TensorDataset(torch.LongTensor(inputs_ids), torch.LongTensor(attention_masks), torch.LongTensor(df['label'].values))
    dataloader = DataLoader(data, sampler=RandomSampler(data), batch_size=batch_size)
    return dataloader

In [5]:
def model_train(model, train_dataloader, validation_dataloader):
    # Store our loss and accuracy for plotting
    train_loss_set = []
    # BERT training loop
    epochs = 3
    for _ in trange(epochs, desc="Epoch"):  
        # Set our model to training mode
        model.train()
        # Tracking variables
        tr_loss = 0
        nb_tr_examples, nb_tr_steps = 0, 0
        # Train the data for one epoch
        for step, batch in enumerate(train_dataloader):
            # Add batch to GPU
            batch = tuple(t.to(device) for t in batch)
            # Unpack the inputs from our dataloader
            b_input_ids, b_input_mask, b_labels = batch
            # Clear out the gradients (by default they accumulate)
            optimizer.zero_grad()
            # Forward pass
            loss = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)
            train_loss_set.append(loss.item())    
            # Backward pass
            loss.backward()
            # Update parameters and take a step using the computed gradient
            optimizer.step()
            # Update tracking variables
            tr_loss += loss.item()
            nb_tr_examples += b_input_ids.size(0)
            nb_tr_steps += 1
        print("Train loss: {}".format(tr_loss/nb_tr_steps))

        ## VALIDATION

        # Put model in evaluation mode
        model.eval()
        # Tracking variables 
        eval_loss, eval_accuracy = 0, 0
        nb_eval_steps, nb_eval_examples = 0, 0
        # Evaluate data for one epoch
        for batch in validation_dataloader:
            # Add batch to GPU
            batch = tuple(t.to(device) for t in batch)
            # Unpack the inputs from our dataloader
            b_input_ids, b_input_mask, b_labels = batch
            # Telling the model not to compute or store gradients, saving memory and speeding up validation
            with torch.no_grad():
                # Forward pass, calculate logit predictions
                logits = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)    
            # Move logits and labels to CPU
            logits = logits.detach().cpu().numpy()
            label_ids = b_labels.to('cpu').numpy()
            tmp_eval_accuracy = flat_accuracy(logits, label_ids)    
            eval_accuracy += tmp_eval_accuracy
            nb_eval_steps += 1
        validation_accuracy = (eval_accuracy/nb_eval_steps)
        print("Validation Accuracy: {}".format(eval_accuracy/nb_eval_steps))
    return validation_accuracy

In [6]:
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

def model_test(model,prediction_dataloader):
    model.eval()
    # Tracking variables 
    predictions , true_labels = [], []
# Predict 
    for batch in prediction_dataloader:
        # Add batch to GPU
        batch = tuple(t.to(device) for t in batch)
        # Unpack the inputs from our dataloader
        b_input_ids, b_input_mask, b_labels = batch
        # Telling the model not to compute or store gradients, saving memory and speeding up prediction
        with torch.no_grad():
            # Forward pass, calculate logit predictions
            logits = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)
        # Move logits and labels to CPU
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()
        predictions+=list(np.argmax(logits, axis=1).flatten())
        true_labels+=list(label_ids.flatten())

    test_f1_score = f1_score(true_labels, predictions, average= 'macro')
    print("Macro F1 Score:",test_f1_score)
    test_accuracy_score = accuracy_score(true_labels, predictions)
    print("Accuracy score:", test_accuracy_score, "\n")
    print("="*100)
    return test_f1_score

In [7]:
def model_initialise(path= None , use_saved_model=False):
  # Load BertForSequenceClassification, the pretrained BERT model with a single linear classification layer on top. 
  model = BertForSequenceClassification.from_pretrained("bert-base-multilingual-uncased", num_labels=2).cuda()

  param_optimizer = list(model.named_parameters())
  no_decay = ['bias', 'gamma', 'beta']
  optimizer_grouped_parameters = [
    {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
     'weight_decay_rate': 0.01},
    {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
     'weight_decay_rate': 0.0}]

  optimizer = BertAdam(optimizer_grouped_parameters, lr=2e-5, warmup=.1)
  
  if(use_saved_model==True):
    checkpoint = torch.load(path)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

  return model, optimizer

In [9]:
languages = {'en_fr':'Arabic','fr_ar':'English','en_ar':'French'} 
directory = './'
drive_directory = '/content/drive/My Drive/Multilingual models' 
for lang1, lang2 in languages.items():
	df = pd.read_csv(os.path.join(directory, lang1+'.csv'))
	train_df, validation_df = train_validate_split(df)
	train_input_ids, train_attention_masks = tokenize_data(train_df)
	train_dataloader = Data_Loader(train_input_ids, train_attention_masks, train_df)
	validation_input_ids, validation_attention_masks = tokenize_data(validation_df)
	validation_dataloader = Data_Loader(validation_input_ids, validation_attention_masks, validation_df)
	model, optimizer = model_initialise()
	validation_accuracy = model_train(model, train_dataloader, validation_dataloader)
	fname = 'mBERT'+lang2+'.pth'
	path = os.path.join(drive_directory, fname)
	torch.save({'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict()}, path)
	test_df = pd.read_csv(os.path.join(directory, lang2+'.csv'))
	test_input_ids, test_attention_masks = tokenize_data(test_df)
	test_dataloader = Data_Loader(test_input_ids, test_attention_masks, test_df)
	print("\nZero Shot Model for test:",lang2,'\n')
	zero_shot_score = model_test(model, test_dataloader)
	sample_sizes = [16, 32, 64, 128, 256]
	for sample in sample_sizes:
		df = pd.read_csv(os.path.join(directory, lang2+'.csv'))
		seeds = [2018, 2019, 2020, 2021, 2022]
		scores=[]
		for seed in seeds:
			np.random.seed(seed)
			train_df, validation_df, test_df = train_validate_test_split(df,seed)
			train_len = len(train_df)
			if sample==256 and seed==2022:
			    sample_sizes.append(train_len)
			if sample == train_len and seed == 2022:
			    sample_sizes.remove(train_len)
			model, optimizer = model_initialise(path,use_saved_model=True)
			if (sample != train_len):
				train_df_sample = sample_data(train_df,sample,seed)
				train_input_ids, train_attention_masks = tokenize_data(train_df_sample)
				train_dataloader = Data_Loader(train_input_ids, train_attention_masks, train_df_sample)
			else:
				train_input_ids, train_attention_masks = tokenize_data(train_df)
				train_dataloader = Data_Loader(train_input_ids, train_attention_masks, train_df)

			validation_input_ids, validation_attention_masks = tokenize_data(validation_df)
			validation_dataloader = Data_Loader(validation_input_ids, validation_attention_masks, validation_df)
			print("\nModel Summary:")
			print('Language:', lang2)
			print('Sample Size:', sample)
			print('Seed value:', seed)
			validation_accuracy = model_train(model, train_dataloader, validation_dataloader)
			test_input_ids, test_attention_masks = tokenize_data(test_df)
			test_dataloader = Data_Loader(test_input_ids, test_attention_masks, test_df)
			scores.append(model_test(model, test_dataloader))
		print("\nThe Average F1-Score of",lang2,"for the sample size",sample,"is:",sum(scores)/len(scores))

In [None]:
# 100%|██████████| 871891/871891 [00:00<00:00, 2036519.43B/s]
# Token indices sequence length is longer than the specified maximum  sequence length for this BERT model (552 > 512). Running this sequence through BERT will result in indexing errors
# 100%|██████████| 623743758/623743758 [00:21<00:00, 28919379.27B/s]
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]Train loss: 0.35031267179318726
# Epoch:  33%|███▎      | 1/3 [24:09<48:19, 1449.51s/it]Validation Accuracy: 0.8565821256038648
# Train loss: 0.32770233828394957
# Epoch:  67%|██████▋   | 2/3 [48:24<24:11, 1451.21s/it]Validation Accuracy: 0.8531151091121106
# Train loss: 0.3251827644762679
# Epoch: 100%|██████████| 3/3 [1:12:41<00:00, 1453.91s/it]Validation Accuracy: 0.8629122938530734


# Zero Shot Model for test: Arabic 

# Macro F1 Score: 0.4417945823357679
# Accuracy score: 0.7888735314443677 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: Arabic
# Sample Size: 16
# Seed value: 2018
# Train loss: 0.4877600073814392
# Epoch:  33%|███▎      | 1/3 [00:02<00:04,  2.17s/it]Validation Accuracy: 0.7921195652173914
# Train loss: 0.43853458762168884
# Epoch:  67%|██████▋   | 2/3 [00:04<00:02,  2.18s/it]Validation Accuracy: 0.7853260869565217
# Train loss: 0.31721967458724976
# Epoch: 100%|██████████| 3/3 [00:06<00:00,  2.18s/it]Validation Accuracy: 0.7948369565217391

# Macro F1 Score: 0.44512784258277366
# Accuracy score: 0.7892918825561313 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: Arabic
# Sample Size: 16
# Seed value: 2019
# Train loss: 0.3746156692504883
# Epoch:  33%|███▎      | 1/3 [00:02<00:04,  2.15s/it]Validation Accuracy: 0.7839673913043478
# Train loss: 0.4709932208061218
# Epoch:  67%|██████▋   | 2/3 [00:04<00:02,  2.15s/it]Validation Accuracy: 0.7758152173913043
# Train loss: 0.44868525862693787
# Epoch: 100%|██████████| 3/3 [00:06<00:00,  2.15s/it]Validation Accuracy: 0.782608695652174

# Macro F1 Score: 0.4405797101449275
# Accuracy score: 0.7875647668393783 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: Arabic
# Sample Size: 16
# Seed value: 2020
# Train loss: 0.537870466709137
# Epoch:  33%|███▎      | 1/3 [00:02<00:04,  2.14s/it]Validation Accuracy: 0.7880434782608695
# Train loss: 0.5740197896957397
# Epoch:  67%|██████▋   | 2/3 [00:04<00:02,  2.15s/it]Validation Accuracy: 0.7921195652173914
# Train loss: 0.48160475492477417
# Epoch: 100%|██████████| 3/3 [00:06<00:00,  2.16s/it]Validation Accuracy: 0.7907608695652174

# Macro F1 Score: 0.44484123645172713
# Accuracy score: 0.7884283246977547 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: Arabic
# Sample Size: 16
# Seed value: 2021
# Train loss: 0.6358118653297424
# Epoch:  33%|███▎      | 1/3 [00:02<00:04,  2.17s/it]Validation Accuracy: 0.7921195652173914
# Train loss: 0.5365766882896423
# Epoch:  67%|██████▋   | 2/3 [00:04<00:02,  2.16s/it]Validation Accuracy: 0.7921195652173914
# Train loss: 0.5496635437011719
# Epoch: 100%|██████████| 3/3 [00:06<00:00,  2.16s/it]Validation Accuracy: 0.7921195652173914

# Macro F1 Score: 0.4445545017782492
# Accuracy score: 0.7875647668393783 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: Arabic
# Sample Size: 16
# Seed value: 2022
# Train loss: 0.5506600141525269
# Epoch:  33%|███▎      | 1/3 [00:02<00:04,  2.14s/it]Validation Accuracy: 0.7880434782608695
# Train loss: 0.6158686876296997
# Epoch:  67%|██████▋   | 2/3 [00:04<00:02,  2.14s/it]Validation Accuracy: 0.7866847826086957
# Train loss: 0.45166030526161194
# Epoch: 100%|██████████| 3/3 [00:06<00:00,  2.15s/it]Validation Accuracy: 0.7907608695652174

# Macro F1 Score: 0.4405797101449275
# Accuracy score: 0.7875647668393783 

# ====================================================================================================

# The Average F1-Score of Arabic for the sample size 16 is: 0.443136600220521
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: Arabic
# Sample Size: 32
# Seed value: 2018
# Train loss: 0.47631022334098816
# Epoch:  33%|███▎      | 1/3 [00:02<00:04,  2.39s/it]Validation Accuracy: 0.7894021739130435
# Train loss: 0.39869584143161774
# Epoch:  67%|██████▋   | 2/3 [00:04<00:02,  2.39s/it]Validation Accuracy: 0.7894021739130435
# Train loss: 0.39814023673534393
# Epoch: 100%|██████████| 3/3 [00:07<00:00,  2.39s/it]Validation Accuracy: 0.782608695652174

# Macro F1 Score: 0.4642141990291262
# Accuracy score: 0.7892918825561313 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: Arabic
# Sample Size: 32
# Seed value: 2019
# Train loss: 0.677393764257431
# Epoch:  33%|███▎      | 1/3 [00:02<00:04,  2.38s/it]Validation Accuracy: 0.7880434782608695
# Train loss: 0.5702735483646393
# Epoch:  67%|██████▋   | 2/3 [00:04<00:02,  2.38s/it]Validation Accuracy: 0.779891304347826
# Train loss: 0.5174460858106613
# Epoch: 100%|██████████| 3/3 [00:07<00:00,  2.39s/it]Validation Accuracy: 0.7839673913043478

# Macro F1 Score: 0.46017067671318895
# Accuracy score: 0.7884283246977547 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: Arabic
# Sample Size: 32
# Seed value: 2020
# Train loss: 0.5345990210771561
# Epoch:  33%|███▎      | 1/3 [00:02<00:04,  2.40s/it]Validation Accuracy: 0.7934782608695652
# Train loss: 0.4911712408065796
# Epoch:  67%|██████▋   | 2/3 [00:04<00:02,  2.40s/it]Validation Accuracy: 0.7961956521739131
# Train loss: 0.46835319697856903
# Epoch: 100%|██████████| 3/3 [00:07<00:00,  2.40s/it]Validation Accuracy: 0.7921195652173914

# Macro F1 Score: 0.45509795658426605
# Accuracy score: 0.7849740932642487 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: Arabic
# Sample Size: 32
# Seed value: 2021
# Train loss: 0.6761367321014404
# Epoch:  33%|███▎      | 1/3 [00:02<00:04,  2.39s/it]Validation Accuracy: 0.779891304347826
# Train loss: 0.42713192105293274
# Epoch:  67%|██████▋   | 2/3 [00:04<00:02,  2.39s/it]Validation Accuracy: 0.7880434782608695
# Train loss: 0.5124907940626144
# Epoch: 100%|██████████| 3/3 [00:07<00:00,  2.40s/it]Validation Accuracy: 0.7894021739130435

# Macro F1 Score: 0.4753586710294488
# Accuracy score: 0.7901554404145078 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: Arabic
# Sample Size: 32
# Seed value: 2022
# Train loss: 0.6386280953884125
# Epoch:  33%|███▎      | 1/3 [00:02<00:04,  2.40s/it]Validation Accuracy: 0.782608695652174
# Train loss: 0.5096715986728668
# Epoch:  67%|██████▋   | 2/3 [00:04<00:02,  2.40s/it]Validation Accuracy: 0.7771739130434783
# Train loss: 0.5280434042215347
# Epoch: 100%|██████████| 3/3 [00:07<00:00,  2.39s/it]Validation Accuracy: 0.78125

# Macro F1 Score: 0.4442676365840721
# Accuracy score: 0.7867012089810017 

# ====================================================================================================

# The Average F1-Score of Arabic for the sample size 32 is: 0.45982182798802046
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: Arabic
# Sample Size: 64
# Seed value: 2018
# Train loss: 0.5445476695895195
# Epoch:  33%|███▎      | 1/3 [00:02<00:05,  2.86s/it]Validation Accuracy: 0.7894021739130435
# Train loss: 0.530044287443161
# Epoch:  67%|██████▋   | 2/3 [00:05<00:02,  2.86s/it]Validation Accuracy: 0.7934782608695652
# Train loss: 0.4286636412143707
# Epoch: 100%|██████████| 3/3 [00:08<00:00,  2.86s/it]Validation Accuracy: 0.7894021739130435

# Macro F1 Score: 0.44512784258277366
# Accuracy score: 0.7892918825561313 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: Arabic
# Sample Size: 64
# Seed value: 2019
# Train loss: 0.6089915484189987
# Epoch:  33%|███▎      | 1/3 [00:02<00:05,  2.87s/it]Validation Accuracy: 0.78125
# Train loss: 0.5037054270505905
# Epoch:  67%|██████▋   | 2/3 [00:05<00:02,  2.87s/it]Validation Accuracy: 0.7948369565217391
# Train loss: 0.4557698369026184
# Epoch: 100%|██████████| 3/3 [00:08<00:00,  2.86s/it]Validation Accuracy: 0.7853260869565217

# Macro F1 Score: 0.44484123645172713
# Accuracy score: 0.7884283246977547 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: Arabic
# Sample Size: 64
# Seed value: 2020
# Train loss: 0.5609106048941612
# Epoch:  33%|███▎      | 1/3 [00:02<00:05,  2.85s/it]Validation Accuracy: 0.7907608695652174
# Train loss: 0.5048648715019226
# Epoch:  67%|██████▋   | 2/3 [00:05<00:02,  2.85s/it]Validation Accuracy: 0.7866847826086957
# Train loss: 0.4291808754205704
# Epoch: 100%|██████████| 3/3 [00:08<00:00,  2.86s/it]Validation Accuracy: 0.7853260869565217

# Macro F1 Score: 0.44084983099951713
# Accuracy score: 0.7884283246977547 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: Arabic
# Sample Size: 64
# Seed value: 2021
# Train loss: 0.5369466841220856
# Epoch:  33%|███▎      | 1/3 [00:02<00:05,  2.87s/it]Validation Accuracy: 0.7921195652173914
# Train loss: 0.55982705950737
# Epoch:  67%|██████▋   | 2/3 [00:05<00:02,  2.86s/it]Validation Accuracy: 0.7934782608695652
# Train loss: 0.49174918234348297
# Epoch: 100%|██████████| 3/3 [00:08<00:00,  2.86s/it]Validation Accuracy: 0.779891304347826

# Macro F1 Score: 0.4490703188369626
# Accuracy score: 0.7892918825561313 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: Arabic
# Sample Size: 64
# Seed value: 2022
# Train loss: 0.5853915736079216
# Epoch:  33%|███▎      | 1/3 [00:02<00:05,  2.86s/it]Validation Accuracy: 0.7866847826086957
# Train loss: 0.5579991340637207
# Epoch:  67%|██████▋   | 2/3 [00:05<00:02,  2.86s/it]Validation Accuracy: 0.7948369565217391
# Train loss: 0.47996123880147934
# Epoch: 100%|██████████| 3/3 [00:08<00:00,  2.86s/it]Validation Accuracy: 0.7866847826086957

# Macro F1 Score: 0.44084983099951713
# Accuracy score: 0.7884283246977547 

# ====================================================================================================

# The Average F1-Score of Arabic for the sample size 64 is: 0.44414781197409947
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: Arabic
# Sample Size: 128
# Seed value: 2018
# Train loss: 0.5351656377315521
# Epoch:  33%|███▎      | 1/3 [00:03<00:07,  3.83s/it]Validation Accuracy: 0.7880434782608695
# Train loss: 0.4358520284295082
# Epoch:  67%|██████▋   | 2/3 [00:07<00:03,  3.82s/it]Validation Accuracy: 0.779891304347826
# Train loss: 0.4474550224840641
# Epoch: 100%|██████████| 3/3 [00:11<00:00,  3.81s/it]Validation Accuracy: 0.7989130434782609

# Macro F1 Score: 0.520943651433176
# Accuracy score: 0.8013816925734024 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: Arabic
# Sample Size: 128
# Seed value: 2019
# Train loss: 0.5876385718584061
# Epoch:  33%|███▎      | 1/3 [00:03<00:07,  3.80s/it]Validation Accuracy: 0.78125
# Train loss: 0.4993381015956402
# Epoch:  67%|██████▋   | 2/3 [00:07<00:03,  3.79s/it]Validation Accuracy: 0.7880434782608695
# Train loss: 0.4376717023551464
# Epoch: 100%|██████████| 3/3 [00:11<00:00,  3.79s/it]Validation Accuracy: 0.7975543478260869

# Macro F1 Score: 0.4769815727857012
# Accuracy score: 0.7936096718480138 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: Arabic
# Sample Size: 128
# Seed value: 2020
# Train loss: 0.4564414247870445
# Epoch:  33%|███▎      | 1/3 [00:03<00:07,  3.79s/it]Validation Accuracy: 0.7948369565217391
# Train loss: 0.4572427049279213
# Epoch:  67%|██████▋   | 2/3 [00:07<00:03,  3.80s/it]Validation Accuracy: 0.7921195652173914
# Train loss: 0.28521990310400724
# Epoch: 100%|██████████| 3/3 [00:11<00:00,  3.80s/it]Validation Accuracy: 0.8274456521739131

# Macro F1 Score: 0.6579425211006367
# Accuracy score: 0.8298791018998273 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: Arabic
# Sample Size: 128
# Seed value: 2021
# Train loss: 0.5158258713781834
# Epoch:  33%|███▎      | 1/3 [00:03<00:07,  3.80s/it]Validation Accuracy: 0.782608695652174
# Train loss: 0.4578445926308632
# Epoch:  67%|██████▋   | 2/3 [00:07<00:03,  3.80s/it]Validation Accuracy: 0.7921195652173914
# Train loss: 0.36842665262520313
# Epoch: 100%|██████████| 3/3 [00:11<00:00,  3.81s/it]Validation Accuracy: 0.8002717391304348

# Macro F1 Score: 0.49647308918736105
# Accuracy score: 0.7979274611398963 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: Arabic
# Sample Size: 128
# Seed value: 2022
# Train loss: 0.550058402121067
# Epoch:  33%|███▎      | 1/3 [00:03<00:07,  3.83s/it]Validation Accuracy: 0.7866847826086957
# Train loss: 0.45853661745786667
# Epoch:  67%|██████▋   | 2/3 [00:07<00:03,  3.82s/it]Validation Accuracy: 0.7880434782608695
# Train loss: 0.4035132862627506
# Epoch: 100%|██████████| 3/3 [00:11<00:00,  3.82s/it]Validation Accuracy: 0.811141304347826

# Macro F1 Score: 0.5821629165673827
# Accuracy score: 0.8169257340241797 

# ====================================================================================================

# The Average F1-Score of Arabic for the sample size 128 is: 0.5469007502148515
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: Arabic
# Sample Size: 256
# Seed value: 2018
# Train loss: 0.5149418320506811
# Epoch:  33%|███▎      | 1/3 [00:05<00:11,  5.70s/it]Validation Accuracy: 0.7921195652173914
# Train loss: 0.4195546731352806
# Epoch:  67%|██████▋   | 2/3 [00:11<00:05,  5.70s/it]Validation Accuracy: 0.8179347826086957
# Train loss: 0.3504837276414037
# Epoch: 100%|██████████| 3/3 [00:17<00:00,  5.71s/it]Validation Accuracy: 0.8274456521739131

# Macro F1 Score: 0.7182855626326963
# Accuracy score: 0.8290155440414507 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: Arabic
# Sample Size: 256
# Seed value: 2019
# Train loss: 0.5347324442118406
# Epoch:  33%|███▎      | 1/3 [00:05<00:11,  5.74s/it]Validation Accuracy: 0.7880434782608695
# Train loss: 0.44904034584760666
# Epoch:  67%|██████▋   | 2/3 [00:11<00:05,  5.74s/it]Validation Accuracy: 0.8165760869565217
# Train loss: 0.33564471430145204
# Epoch: 100%|██████████| 3/3 [00:17<00:00,  5.73s/it]Validation Accuracy: 0.8274456521739131

# Macro F1 Score: 0.6917192097519966
# Accuracy score: 0.8359240069084629 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: Arabic
# Sample Size: 256
# Seed value: 2020
# Train loss: 0.508904873393476
# Epoch:  33%|███▎      | 1/3 [00:05<00:11,  5.75s/it]Validation Accuracy: 0.7934782608695652
# Train loss: 0.37521744426339865
# Epoch:  67%|██████▋   | 2/3 [00:11<00:05,  5.74s/it]Validation Accuracy: 0.8274456521739131
# Train loss: 0.31107282917946577
# Epoch: 100%|██████████| 3/3 [00:17<00:00,  5.73s/it]Validation Accuracy: 0.84375

# Macro F1 Score: 0.7027720739219712
# Accuracy score: 0.8411053540587219 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: Arabic
# Sample Size: 256
# Seed value: 2021
# Train loss: 0.47822216898202896
# Epoch:  33%|███▎      | 1/3 [00:05<00:11,  5.74s/it]Validation Accuracy: 0.7921195652173914
# Train loss: 0.37390390131622553
# Epoch:  67%|██████▋   | 2/3 [00:11<00:05,  5.75s/it]Validation Accuracy: 0.8233695652173914
# Train loss: 0.27208448480814695
# Epoch: 100%|██████████| 3/3 [00:17<00:00,  5.75s/it]Validation Accuracy: 0.8097826086956522

# Macro F1 Score: 0.5948454012765513
# Accuracy score: 0.8151986183074266 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: Arabic
# Sample Size: 256
# Seed value: 2022
# Train loss: 0.523190813139081
# Epoch:  33%|███▎      | 1/3 [00:05<00:11,  5.74s/it]Validation Accuracy: 0.7907608695652174
# Train loss: 0.3772939518094063
# Epoch:  67%|██████▋   | 2/3 [00:11<00:05,  5.74s/it]Validation Accuracy: 0.8220108695652174
# Train loss: 0.29486718913540244
# Epoch: 100%|██████████| 3/3 [00:17<00:00,  5.73s/it]Validation Accuracy: 0.8288043478260869

# Macro F1 Score: 0.7225017972681524
# Accuracy score: 0.844559585492228 

# ====================================================================================================

# The Average F1-Score of Arabic for the sample size 256 is: 0.6860248089702736
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: Arabic
# Sample Size: 4051
# Seed value: 2018
# Train loss: 0.3981095462426402
# Epoch:  33%|███▎      | 1/3 [01:16<02:33, 76.85s/it]Validation Accuracy: 0.8790760869565217
# Train loss: 0.31772420958436626
# Epoch:  67%|██████▋   | 2/3 [02:33<01:16, 76.93s/it]Validation Accuracy: 0.8872282608695652
# Train loss: 0.287292120707947
# Epoch: 100%|██████████| 3/3 [03:51<00:00, 77.10s/it]Validation Accuracy: 0.8885869565217391

# Macro F1 Score: 0.8153414295033572
# Accuracy score: 0.8955094991364422 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: Arabic
# Sample Size: 4051
# Seed value: 2019
# Train loss: 0.4046933833591946
# Epoch:  33%|███▎      | 1/3 [01:16<02:33, 76.83s/it]Validation Accuracy: 0.8709239130434783
# Train loss: 0.3386469119307747
# Epoch:  67%|██████▋   | 2/3 [02:33<01:16, 76.89s/it]Validation Accuracy: 0.8695652173913043
# Train loss: 0.2992549875063663
# Epoch: 100%|██████████| 3/3 [03:51<00:00, 77.03s/it]Validation Accuracy: 0.8627717391304348

# Macro F1 Score: 0.8607503607503607
# Accuracy score: 0.9101899827288429 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: Arabic
# Sample Size: 4051
# Seed value: 2020
# Train loss: 0.41032994804134126
# Epoch:  33%|███▎      | 1/3 [01:17<02:35, 77.59s/it]Validation Accuracy: 0.8682065217391305
# Train loss: 0.3320006223906666
# Epoch:  67%|██████▋   | 2/3 [02:35<01:17, 77.75s/it]Validation Accuracy: 0.8872282608695652
# Train loss: 0.3045483290773085
# Epoch: 100%|██████████| 3/3 [03:53<00:00, 77.90s/it]Validation Accuracy: 0.8953804347826086

# Macro F1 Score: 0.8736524227353839
# Accuracy score: 0.9196891191709845 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: Arabic
# Sample Size: 4051
# Seed value: 2021
# Train loss: 0.39447353780269623
# Epoch:  33%|███▎      | 1/3 [01:17<02:35, 77.87s/it]Validation Accuracy: 0.8804347826086957
# Train loss: 0.3118106427158843
# Epoch:  67%|██████▋   | 2/3 [02:36<01:17, 77.96s/it]Validation Accuracy: 0.8845108695652174
# Train loss: 0.28322701206965495
# Epoch: 100%|██████████| 3/3 [03:54<00:00, 78.08s/it]Validation Accuracy: 0.8817934782608695

# Macro F1 Score: 0.8452816109629377
# Accuracy score: 0.9084628670120898 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: Arabic
# Sample Size: 4051
# Seed value: 2022
# Train loss: 0.3950808967701644
# Epoch:  33%|███▎      | 1/3 [01:18<02:36, 78.18s/it]Validation Accuracy: 0.8600543478260869
# Train loss: 0.3283273592379755
# Epoch:  67%|██████▋   | 2/3 [02:36<01:18, 78.23s/it]Validation Accuracy: 0.8777173913043478
# Train loss: 0.29545282330422745
# Epoch: 100%|██████████| 3/3 [03:54<00:00, 78.23s/it]Validation Accuracy: 0.8722826086956522

# Macro F1 Score: 0.8237897505996441
# Accuracy score: 0.8981001727115717 

# ====================================================================================================

# The Average F1-Score of Arabic for the sample size 4051 is: 0.8437631149103367

In [None]:
# 100%|██████████| 623743758/623743758 [00:24<00:00, 25124460.61B/s]
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]Train loss: 0.43113599793065954
# Epoch:  33%|███▎      | 1/3 [01:39<03:19, 99.86s/it]Validation Accuracy: 0.849868881118881
# Train loss: 0.32142672359566143
# Epoch:  67%|██████▋   | 2/3 [03:19<01:39, 99.87s/it]Validation Accuracy: 0.8536931818181818
# Train loss: 0.26929735707802865
# Epoch: 100%|██████████| 3/3 [04:59<00:00, 99.95s/it]Validation Accuracy: 0.8448426573426573

# Token indices sequence length is longer than the specified maximum  sequence length for this BERT model (552 > 512). Running this sequence through BERT will result in indexing errors

# Zero Shot Model for test: English 

# Macro F1 Score: 0.4831627812546718
# Accuracy score: 0.8005710166372622 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: English
# Sample Size: 16
# Seed value: 2018
# Train loss: 0.4099963903427124
# Epoch:  33%|███▎      | 1/3 [00:34<01:09, 34.96s/it]Validation Accuracy: 0.8023775375110326
# Train loss: 0.40801510214805603
# Epoch:  67%|██████▋   | 2/3 [01:09<00:34, 34.96s/it]Validation Accuracy: 0.8029843336275375
# Train loss: 0.40257084369659424
# Epoch: 100%|██████████| 3/3 [01:44<00:00, 34.99s/it]Validation Accuracy: 0.8037428287731685

# Macro F1 Score: 0.46735720817932624
# Accuracy score: 0.8039457459926017 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: English
# Sample Size: 16
# Seed value: 2019
# Train loss: 0.43733009696006775
# Epoch:  33%|███▎      | 1/3 [00:34<01:09, 34.97s/it]Validation Accuracy: 0.8028326345984113
# Train loss: 0.38114693760871887
# Epoch:  67%|██████▋   | 2/3 [01:09<00:34, 34.97s/it]Validation Accuracy: 0.8038945278022948
# Train loss: 0.4252425730228424
# Epoch: 100%|██████████| 3/3 [01:44<00:00, 34.99s/it]Validation Accuracy: 0.8046530229479258

# Macro F1 Score: 0.47027882560675344
# Accuracy score: 0.8057004647633501 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: English
# Sample Size: 16
# Seed value: 2020
# Train loss: 0.5467470288276672
# Epoch:  33%|███▎      | 1/3 [00:34<01:09, 34.91s/it]Validation Accuracy: 0.8024189099735217
# Train loss: 0.5087454915046692
# Epoch:  67%|██████▋   | 2/3 [01:09<00:34, 34.93s/it]Validation Accuracy: 0.8041220763459841
# Train loss: 0.5010417699813843
# Epoch: 100%|██████████| 3/3 [01:44<00:00, 34.97s/it]Validation Accuracy: 0.8054597859664607

# Macro F1 Score: 0.462872615156682
# Accuracy score: 0.8066015365645451 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: English
# Sample Size: 16
# Seed value: 2021
# Train loss: 0.5551973581314087
# Epoch:  33%|███▎      | 1/3 [00:34<01:09, 34.89s/it]Validation Accuracy: 0.8044323698146514
# Train loss: 0.445564329624176
# Epoch:  67%|██████▋   | 2/3 [01:09<00:34, 34.93s/it]Validation Accuracy: 0.8064734112974404
# Train loss: 0.4258171617984772
# Epoch: 100%|██████████| 3/3 [01:44<00:00, 34.95s/it]Validation Accuracy: 0.8066664827890556

# Macro F1 Score: 0.46602204498269006
# Accuracy score: 0.8055107654367827 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Token indices sequence length is longer than the specified maximum  sequence length for this BERT model (552 > 512). Running this sequence through BERT will result in indexing errors
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: English
# Sample Size: 16
# Seed value: 2022
# Train loss: 0.3632422387599945
# Epoch:  33%|███▎      | 1/3 [00:34<01:09, 34.85s/it]Validation Accuracy: 0.8025706090026479
# Train loss: 0.28625187277793884
# Epoch:  67%|██████▋   | 2/3 [01:09<00:34, 34.84s/it]Validation Accuracy: 0.8045082193292145
# Train loss: 0.318490207195282
# Epoch: 100%|██████████| 3/3 [01:44<00:00, 34.86s/it]Validation Accuracy: 0.8056804390997353

# Macro F1 Score: 0.47638998683388184
# Accuracy score: 0.8058901640899174 

# ====================================================================================================

# The Average F1-Score of English for the sample size 16 is: 0.4685841361518667
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: English
# Sample Size: 32
# Seed value: 2018
# Train loss: 0.481960654258728
# Epoch:  33%|███▎      | 1/3 [00:35<01:10, 35.07s/it]Validation Accuracy: 0.8046185458958518
# Train loss: 0.43791867792606354
# Epoch:  67%|██████▋   | 2/3 [01:10<00:35, 35.08s/it]Validation Accuracy: 0.8056390666372462
# Train loss: 0.40904052555561066
# Epoch: 100%|██████████| 3/3 [01:45<00:00, 35.11s/it]Validation Accuracy: 0.8061769086496029

# Macro F1 Score: 0.454780586762388
# Accuracy score: 0.8065066869012615 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: English
# Sample Size: 32
# Seed value: 2019
# Train loss: 0.40320681035518646
# Epoch:  33%|███▎      | 1/3 [00:35<01:10, 35.10s/it]Validation Accuracy: 0.80408759929391
# Train loss: 0.3914676308631897
# Epoch:  67%|██████▋   | 2/3 [01:10<00:35, 35.12s/it]Validation Accuracy: 0.8044668468667255
# Train loss: 0.3631250932812691
# Epoch: 100%|██████████| 3/3 [01:45<00:00, 35.14s/it]Validation Accuracy: 0.8038600507502207

# Macro F1 Score: 0.4753402626949118
# Accuracy score: 0.8045148439723039 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: English
# Sample Size: 32
# Seed value: 2020
# Train loss: 0.47909659147262573
# Epoch:  33%|███▎      | 1/3 [00:35<01:10, 35.15s/it]Validation Accuracy: 0.8035566526919683
# Train loss: 0.4376790523529053
# Epoch:  67%|██████▋   | 2/3 [01:10<00:35, 35.17s/it]Validation Accuracy: 0.8060596866725508
# Train loss: 0.3964773118495941
# Epoch: 100%|██████████| 3/3 [01:45<00:00, 35.18s/it]Validation Accuracy: 0.8073491284201236

# Macro F1 Score: 0.4819843048810956
# Accuracy score: 0.8087830788200702 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: English
# Sample Size: 32
# Seed value: 2021
# Train loss: 0.5885375142097473
# Epoch:  33%|███▎      | 1/3 [00:35<01:10, 35.11s/it]Validation Accuracy: 0.8054115180935569
# Train loss: 0.534212738275528
# Epoch:  67%|██████▋   | 2/3 [01:10<00:35, 35.13s/it]Validation Accuracy: 0.8064389342453664
# Train loss: 0.47948476672172546
# Epoch: 100%|██████████| 3/3 [01:45<00:00, 35.16s/it]Validation Accuracy: 0.8067837047661077

# Macro F1 Score: 0.45028432361993304
# Accuracy score: 0.8072180593758892 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Token indices sequence length is longer than the specified maximum  sequence length for this BERT model (552 > 512). Running this sequence through BERT will result in indexing errors
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: English
# Sample Size: 32
# Seed value: 2022
# Train loss: 0.38540011644363403
# Epoch:  33%|███▎      | 1/3 [00:35<01:10, 35.14s/it]Validation Accuracy: 0.804128971756399
# Train loss: 0.34513017535209656
# Epoch:  67%|██████▋   | 2/3 [01:10<00:35, 35.15s/it]Validation Accuracy: 0.8054597859664607
# Train loss: 0.33124737441539764
# Epoch: 100%|██████████| 3/3 [01:45<00:00, 35.18s/it]Validation Accuracy: 0.806252758164166

# Macro F1 Score: 0.4706688844354512
# Accuracy score: 0.8071232097126055 

# ====================================================================================================

# The Average F1-Score of English for the sample size 32 is: 0.46661167247875585
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: English
# Sample Size: 64
# Seed value: 2018
# Train loss: 0.4707513526082039
# Epoch:  33%|███▎      | 1/3 [00:35<01:11, 35.70s/it]Validation Accuracy: 0.8052942961165048
# Train loss: 0.41815243661403656
# Epoch:  67%|██████▋   | 2/3 [01:11<00:35, 35.71s/it]Validation Accuracy: 0.8062872352162401
# Train loss: 0.3521386571228504
# Epoch: 100%|██████████| 3/3 [01:47<00:00, 35.70s/it]Validation Accuracy: 0.8038531553398058

# Macro F1 Score: 0.47875848186559716
# Accuracy score: 0.805083941952006 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: English
# Sample Size: 64
# Seed value: 2019
# Train loss: 0.4334219805896282
# Epoch:  33%|███▎      | 1/3 [00:35<01:11, 35.62s/it]Validation Accuracy: 0.8055976941747572
# Train loss: 0.38532813638448715
# Epoch:  67%|██████▋   | 2/3 [01:11<00:35, 35.65s/it]Validation Accuracy: 0.8056459620476611
# Train loss: 0.2856447622179985
# Epoch: 100%|██████████| 3/3 [01:47<00:00, 35.68s/it]Validation Accuracy: 0.7887728927625772

# Macro F1 Score: 0.5380946352831874
# Accuracy score: 0.7863985582851181 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: English
# Sample Size: 64
# Seed value: 2020
# Train loss: 0.4602235108613968
# Epoch:  33%|███▎      | 1/3 [00:35<01:11, 35.73s/it]Validation Accuracy: 0.8062803398058253
# Train loss: 0.43965379893779755
# Epoch:  67%|██████▋   | 2/3 [01:11<00:35, 35.74s/it]Validation Accuracy: 0.8070802074139453
# Train loss: 0.35857948660850525
# Epoch: 100%|██████████| 3/3 [01:47<00:00, 35.73s/it]Validation Accuracy: 0.8082179501323918

# Macro F1 Score: 0.49688905839199765
# Accuracy score: 0.8080242815138007 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: English
# Sample Size: 64
# Seed value: 2021
# Train loss: 0.5725987479090691
# Epoch:  33%|███▎      | 1/3 [00:35<01:11, 35.71s/it]Validation Accuracy: 0.8066389011473962
# Train loss: 0.4916360303759575
# Epoch:  67%|██████▋   | 2/3 [01:11<00:35, 35.70s/it]Validation Accuracy: 0.808452394086496
# Train loss: 0.4244931899011135
# Epoch: 100%|██████████| 3/3 [01:47<00:00, 35.71s/it]Validation Accuracy: 0.8101555604589585

# Macro F1 Score: 0.4966334904367956
# Accuracy score: 0.8097315754529072 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Token indices sequence length is longer than the specified maximum  sequence length for this BERT model (552 > 512). Running this sequence through BERT will result in indexing errors
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: English
# Sample Size: 64
# Seed value: 2022
# Train loss: 0.4237774834036827
# Epoch:  33%|███▎      | 1/3 [00:35<01:11, 35.66s/it]Validation Accuracy: 0.805370145631068
# Train loss: 0.3782000243663788
# Epoch:  67%|██████▋   | 2/3 [01:11<00:35, 35.66s/it]Validation Accuracy: 0.8067423323036188
# Train loss: 0.3042933028191328
# Epoch: 100%|██████████| 3/3 [01:47<00:00, 35.67s/it]Validation Accuracy: 0.8066664827890556

# Macro F1 Score: 0.5031211701341662
# Accuracy score: 0.8070757848809637 

# ====================================================================================================

# The Average F1-Score of English for the sample size 64 is: 0.5026993672223489
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: English
# Sample Size: 128
# Seed value: 2018
# Train loss: 0.4767194427549839
# Epoch:  33%|███▎      | 1/3 [00:36<01:13, 36.61s/it]Validation Accuracy: 0.8067423323036188
# Train loss: 0.38964541256427765
# Epoch:  67%|██████▋   | 2/3 [01:13<00:36, 36.62s/it]Validation Accuracy: 0.7970680714916152
# Train loss: 0.2591676265001297
# Epoch: 100%|██████████| 3/3 [01:49<00:00, 36.64s/it]Validation Accuracy: 0.7925584730803178

# Macro F1 Score: 0.5825569767206635
# Accuracy score: 0.7914730152707958 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: English
# Sample Size: 128
# Seed value: 2019
# Train loss: 0.4817820377647877
# Epoch:  33%|███▎      | 1/3 [00:36<01:13, 36.75s/it]Validation Accuracy: 0.805370145631068
# Train loss: 0.3923627510666847
# Epoch:  67%|██████▋   | 2/3 [01:13<00:36, 36.74s/it]Validation Accuracy: 0.802763680494263
# Train loss: 0.2881836351007223
# Epoch: 100%|██████████| 3/3 [01:50<00:00, 36.74s/it]Validation Accuracy: 0.7656663724624889

# Macro F1 Score: 0.5910614854290478
# Accuracy score: 0.7639666129185242 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: English
# Sample Size: 128
# Seed value: 2020
# Train loss: 0.5083153508603573
# Epoch:  33%|███▎      | 1/3 [00:36<01:13, 36.79s/it]Validation Accuracy: 0.806452725066196
# Train loss: 0.426469536498189
# Epoch:  67%|██████▋   | 2/3 [01:13<00:36, 36.77s/it]Validation Accuracy: 0.8087213150926743
# Train loss: 0.3406222891062498
# Epoch: 100%|██████████| 3/3 [01:50<00:00, 36.77s/it]Validation Accuracy: 0.8023085834068844

# Macro F1 Score: 0.5679909211623383
# Accuracy score: 0.8036611970027506 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: English
# Sample Size: 128
# Seed value: 2021
# Train loss: 0.5590024255216122
# Epoch:  33%|███▎      | 1/3 [00:36<01:13, 36.65s/it]Validation Accuracy: 0.8075835723742277
# Train loss: 0.4459077939391136
# Epoch:  67%|██████▋   | 2/3 [01:13<00:36, 36.68s/it]Validation Accuracy: 0.8105003309797
# Train loss: 0.38894867710769176
# Epoch: 100%|██████████| 3/3 [01:50<00:00, 36.74s/it]Validation Accuracy: 0.8051494924977936

# Macro F1 Score: 0.5368288583758943
# Accuracy score: 0.8042777198140947 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Token indices sequence length is longer than the specified maximum  sequence length for this BERT model (552 > 512). Running this sequence through BERT will result in indexing errors
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: English
# Sample Size: 128
# Seed value: 2022
# Train loss: 0.48631179705262184
# Epoch:  33%|███▎      | 1/3 [00:36<01:13, 36.78s/it]Validation Accuracy: 0.8069078221535746
# Train loss: 0.4277756381779909
# Epoch:  67%|██████▋   | 2/3 [01:13<00:36, 36.76s/it]Validation Accuracy: 0.804770244924978
# Train loss: 0.29662161506712437
# Epoch: 100%|██████████| 3/3 [01:50<00:00, 36.79s/it]Validation Accuracy: 0.8045495917917034

# Macro F1 Score: 0.5417470807349343
# Accuracy score: 0.8050365171203642 

# ====================================================================================================

# The Average F1-Score of English for the sample size 128 is: 0.5640370644845756
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: English
# Sample Size: 256
# Seed value: 2018
# Train loss: 0.46986593399196863
# Epoch:  33%|███▎      | 1/3 [00:38<01:17, 38.64s/it]Validation Accuracy: 0.8078800750220654
# Train loss: 0.39221590012311935
# Epoch:  67%|██████▋   | 2/3 [01:17<00:38, 38.68s/it]Validation Accuracy: 0.8121690203000883
# Train loss: 0.23609184147790074
# Epoch: 100%|██████████| 3/3 [01:56<00:00, 38.70s/it]Validation Accuracy: 0.7809121248896734

# Macro F1 Score: 0.6205074396979988
# Accuracy score: 0.7826519965854121 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: English
# Sample Size: 256
# Seed value: 2019
# Train loss: 0.49670007824897766
# Epoch:  33%|███▎      | 1/3 [00:38<01:17, 38.68s/it]Validation Accuracy: 0.8086730472197705
# Train loss: 0.41641145572066307
# Epoch:  67%|██████▋   | 2/3 [01:17<00:38, 38.68s/it]Validation Accuracy: 0.8114518976169461
# Train loss: 0.2459071557968855
# Epoch: 100%|██████████| 3/3 [01:56<00:00, 38.68s/it]Validation Accuracy: 0.8004123455428067

# Macro F1 Score: 0.6020433735007277
# Accuracy score: 0.8006734326093142 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: English
# Sample Size: 256
# Seed value: 2020
# Train loss: 0.5107050035148859
# Epoch:  33%|███▎      | 1/3 [00:38<01:17, 38.74s/it]Validation Accuracy: 0.8071560569285083
# Train loss: 0.4253020389005542
# Epoch:  67%|██████▋   | 2/3 [01:17<00:38, 38.77s/it]Validation Accuracy: 0.8109554280670785
# Train loss: 0.3137670950964093
# Epoch: 100%|██████████| 3/3 [01:56<00:00, 38.78s/it]Validation Accuracy: 0.765183693733451

# Macro F1 Score: 0.6088055333806448
# Accuracy score: 0.7617376458313573 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: English
# Sample Size: 256
# Seed value: 2021
# Train loss: 0.504175927489996
# Epoch:  33%|███▎      | 1/3 [00:38<01:17, 38.75s/it]Validation Accuracy: 0.8069698808473081
# Train loss: 0.43708391953259706
# Epoch:  67%|██████▋   | 2/3 [01:17<00:38, 38.75s/it]Validation Accuracy: 0.8086385701676965
# Train loss: 0.335963798686862
# Epoch: 100%|██████████| 3/3 [01:56<00:00, 38.75s/it]Validation Accuracy: 0.8065630516328332

# Macro F1 Score: 0.5577013120218601
# Accuracy score: 0.8073603338708147 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Token indices sequence length is longer than the specified maximum  sequence length for this BERT model (552 > 512). Running this sequence through BERT will result in indexing errors
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: English
# Sample Size: 256
# Seed value: 2022
# Train loss: 0.46283324621617794
# Epoch:  33%|███▎      | 1/3 [00:38<01:17, 38.65s/it]Validation Accuracy: 0.808493766548985
# Train loss: 0.38424935657531023
# Epoch:  67%|██████▋   | 2/3 [01:17<00:38, 38.67s/it]Validation Accuracy: 0.7937306928508384
# Train loss: 0.23871747497469187
# Epoch: 100%|██████████| 3/3 [01:56<00:00, 38.75s/it]Validation Accuracy: 0.7959027471315092

# Macro F1 Score: 0.597326540679203
# Accuracy score: 0.7958360997818458 

# ====================================================================================================

# The Average F1-Score of English for the sample size 256 is: 0.5972768398560869
# t_total value of -1 results in schedule not being applied
# Token indices sequence length is longer than the specified maximum  sequence length for this BERT model (552 > 512). Running this sequence through BERT will result in indexing errors
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: English
# Sample Size: 73797
# Seed value: 2018
# Train loss: 0.3506857882674746
# Epoch:  33%|███▎      | 1/3 [23:43<47:27, 1423.51s/it]Validation Accuracy: 0.8577476831421006
# Train loss: 0.31430662151696137
# Epoch:  67%|██████▋   | 2/3 [48:01<23:53, 1433.76s/it]Validation Accuracy: 0.8711730472197705
# Train loss: 0.29566357425726403
# Epoch: 100%|██████████| 3/3 [1:12:30<00:00, 1450.05s/it]Validation Accuracy: 0.8681804390997353

# Macro F1 Score: 0.7789891385811427
# Accuracy score: 0.883572038319264 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Token indices sequence length is longer than the specified maximum  sequence length for this BERT model (552 > 512). Running this sequence through BERT will result in indexing errors
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: English
# Sample Size: 73797
# Seed value: 2019
# Train loss: 0.3500457019882714
# Epoch:  33%|███▎      | 1/3 [24:13<48:26, 1453.41s/it]Validation Accuracy: 0.8587061451897616
# Train loss: 0.3097635749488438
# Epoch:  67%|██████▋   | 2/3 [47:47<24:01, 1441.57s/it]Validation Accuracy: 0.8668220432480142
# Train loss: 0.3308915351268758
# Epoch: 100%|██████████| 3/3 [1:12:10<00:00, 1443.44s/it]Validation Accuracy: 0.8646499889673434

# Macro F1 Score: 0.7669203142683729
# Accuracy score: 0.877975908185526 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Token indices sequence length is longer than the specified maximum  sequence length for this BERT model (552 > 512). Running this sequence through BERT will result in indexing errors
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: English
# Sample Size: 73797
# Seed value: 2020
# Train loss: 0.3477986186194908
# Epoch:  33%|███▎      | 1/3 [24:21<48:42, 1461.27s/it]Validation Accuracy: 0.8568443843777582
# Train loss: 0.32089542979752483
# Epoch:  67%|██████▋   | 2/3 [47:45<24:04, 1444.10s/it]Validation Accuracy: 0.867952890556046
# Train loss: 0.3035370297051586
# Epoch: 100%|██████████| 3/3 [1:11:50<00:00, 1436.69s/it]Validation Accuracy: 0.8659394307149161

# Macro F1 Score: 0.7877455263679541
# Accuracy score: 0.8842359859622498 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Token indices sequence length is longer than the specified maximum  sequence length for this BERT model (552 > 512). Running this sequence through BERT will result in indexing errors
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: English
# Sample Size: 73797
# Seed value: 2021
# Train loss: 0.3459371478615656
# Epoch:  33%|███▎      | 1/3 [24:23<48:46, 1463.01s/it]Validation Accuracy: 0.8578235326566637
# Train loss: 0.3082692668816198
# Epoch:  67%|██████▋   | 2/3 [48:43<24:22, 1462.37s/it]Validation Accuracy: 0.8630157766990292
# Train loss: 0.29249108955457914
# Epoch: 100%|██████████| 3/3 [1:12:19<00:00, 1446.60s/it]Validation Accuracy: 0.8674150485436893

# Macro F1 Score: 0.7982963821494751
# Accuracy score: 0.8866546523759841 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Token indices sequence length is longer than the specified maximum  sequence length for this BERT model (552 > 512). Running this sequence through BERT will result in indexing errors
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: English
# Sample Size: 73797
# Seed value: 2022
# Train loss: 0.34628187021562984
# Epoch:  33%|███▎      | 1/3 [23:17<46:35, 1397.87s/it]Validation Accuracy: 0.866704821270962
# Train loss: 0.30813515411791315
# Epoch:  67%|██████▋   | 2/3 [46:29<23:15, 1395.91s/it]Validation Accuracy: 0.8717453662842013
# Train loss: 0.29112401696250223
# Epoch: 100%|██████████| 3/3 [1:09:36<00:00, 1392.16s/it]Validation Accuracy: 0.8738622572815534

# Macro F1 Score: 0.8298590211537122
# Accuracy score: 0.8971829650004742 

# ====================================================================================================

# The Average F1-Score of English for the sample size 73797 is: 0.7923620765041315

In [None]:
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]Train loss: 0.3869105575777724
# Epoch:  33%|███▎      | 1/3 [25:32<51:05, 1532.85s/it]Validation Accuracy: 0.8497665229885057
# Train loss: 0.366231967078741
# Epoch:  67%|██████▋   | 2/3 [51:05<25:32, 1532.79s/it]Validation Accuracy: 0.852819683908046
# Train loss: 0.37040582375989184
# Epoch: 100%|██████████| 3/3 [1:16:38<00:00, 1532.77s/it]Validation Accuracy: 0.8590158045977011


# Zero Shot Model for test: French 

# Macro F1 Score: 0.5720060976816932
# Accuracy score: 0.6368852459016393 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: French
# Sample Size: 16
# Seed value: 2018
# Train loss: 0.5113570690155029
# Epoch:  33%|███▎      | 1/3 [00:00<00:01,  1.51it/s]Validation Accuracy: 0.5729166666666667
# Train loss: 0.5027264356613159
# Epoch:  67%|██████▋   | 2/3 [00:01<00:00,  1.52it/s]Validation Accuracy: 0.5826388888888889
# Train loss: 0.4878215789794922
# Epoch: 100%|██████████| 3/3 [00:01<00:00,  1.51it/s]Validation Accuracy: 0.5680555555555555

# Macro F1 Score: 0.5727288471530009
# Accuracy score: 0.6270491803278688 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: French
# Sample Size: 16
# Seed value: 2019
# Train loss: 0.6387056708335876
# Epoch:  33%|███▎      | 1/3 [00:00<00:01,  1.55it/s]Validation Accuracy: 0.6840277777777778
# Train loss: 0.6439587473869324
# Epoch:  67%|██████▋   | 2/3 [00:01<00:00,  1.54it/s]Validation Accuracy: 0.6743055555555555
# Train loss: 0.643578827381134
# Epoch: 100%|██████████| 3/3 [00:01<00:00,  1.54it/s]Validation Accuracy: 0.7027777777777777

# Macro F1 Score: 0.5742254000680966
# Accuracy score: 0.6639344262295082 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: French
# Sample Size: 16
# Seed value: 2020
# Train loss: 0.5029358863830566
# Epoch:  33%|███▎      | 1/3 [00:00<00:01,  1.56it/s]Validation Accuracy: 0.6826388888888889
# Train loss: 0.5442073941230774
# Epoch:  67%|██████▋   | 2/3 [00:01<00:00,  1.55it/s]Validation Accuracy: 0.6763888888888889
# Train loss: 0.5267144441604614
# Epoch: 100%|██████████| 3/3 [00:01<00:00,  1.54it/s]Validation Accuracy: 0.6666666666666667

# Macro F1 Score: 0.5350609756097561
# Accuracy score: 0.5901639344262295 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: French
# Sample Size: 16
# Seed value: 2021
# Train loss: 0.7695850729942322
# Epoch:  33%|███▎      | 1/3 [00:00<00:01,  1.52it/s]Validation Accuracy: 0.6680555555555555
# Train loss: 0.7642257213592529
# Epoch:  67%|██████▋   | 2/3 [00:01<00:00,  1.53it/s]Validation Accuracy: 0.6777777777777778
# Train loss: 0.8199949264526367
# Epoch: 100%|██████████| 3/3 [00:01<00:00,  1.54it/s]Validation Accuracy: 0.6652777777777777

# Macro F1 Score: 0.5858355282411472
# Accuracy score: 0.6434426229508197 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: French
# Sample Size: 16
# Seed value: 2022
# Train loss: 0.7150052189826965
# Epoch:  33%|███▎      | 1/3 [00:00<00:01,  1.56it/s]Validation Accuracy: 0.6791666666666667
# Train loss: 0.7061570286750793
# Epoch:  67%|██████▋   | 2/3 [00:01<00:00,  1.56it/s]Validation Accuracy: 0.6854166666666667
# Train loss: 0.7688431143760681
# Epoch: 100%|██████████| 3/3 [00:01<00:00,  1.55it/s]Validation Accuracy: 0.6965277777777777

# Macro F1 Score: 0.5891290726817042
# Accuracy score: 0.6475409836065574 

# ====================================================================================================

# The Average F1-Score of French for the sample size 16 is: 0.5713959647507411
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: French
# Sample Size: 32
# Seed value: 2018
# Train loss: 0.6156394183635712
# Epoch:  33%|███▎      | 1/3 [00:00<00:01,  1.12it/s]Validation Accuracy: 0.5743055555555555
# Train loss: 0.7526137232780457
# Epoch:  67%|██████▋   | 2/3 [00:01<00:00,  1.12it/s]Validation Accuracy: 0.5854166666666667
# Train loss: 0.6503881216049194
# Epoch: 100%|██████████| 3/3 [00:02<00:00,  1.12it/s]Validation Accuracy: 0.5868055555555556

# Macro F1 Score: 0.5960512600404684
# Accuracy score: 0.6680327868852459 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: French
# Sample Size: 32
# Seed value: 2019
# Train loss: 0.6705072224140167
# Epoch:  33%|███▎      | 1/3 [00:00<00:01,  1.13it/s]Validation Accuracy: 0.6840277777777778
# Train loss: 0.705536812543869
# Epoch:  67%|██████▋   | 2/3 [00:01<00:00,  1.13it/s]Validation Accuracy: 0.6840277777777778
# Train loss: 0.5848736017942429
# Epoch: 100%|██████████| 3/3 [00:02<00:00,  1.12it/s]Validation Accuracy: 0.7152777777777778

# Macro F1 Score: 0.577394316505228
# Accuracy score: 0.6680327868852459 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: French
# Sample Size: 32
# Seed value: 2020
# Train loss: 0.5383942425251007
# Epoch:  33%|███▎      | 1/3 [00:00<00:01,  1.13it/s]Validation Accuracy: 0.6652777777777777
# Train loss: 0.5659926980733871
# Epoch:  67%|██████▋   | 2/3 [00:01<00:00,  1.13it/s]Validation Accuracy: 0.6527777777777778
# Train loss: 0.5585153996944427
# Epoch: 100%|██████████| 3/3 [00:02<00:00,  1.13it/s]Validation Accuracy: 0.6541666666666667

# Macro F1 Score: 0.5247723099181985
# Accuracy score: 0.569672131147541 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: French
# Sample Size: 32
# Seed value: 2021
# Train loss: 0.9283453226089478
# Epoch:  33%|███▎      | 1/3 [00:00<00:01,  1.10it/s]Validation Accuracy: 0.6715277777777777
# Train loss: 0.8321980834007263
# Epoch:  67%|██████▋   | 2/3 [00:01<00:00,  1.11it/s]Validation Accuracy: 0.6729166666666667
# Train loss: 0.7613034248352051
# Epoch: 100%|██████████| 3/3 [00:02<00:00,  1.11it/s]Validation Accuracy: 0.6715277777777777

# Macro F1 Score: 0.5955093904102402
# Accuracy score: 0.6762295081967213 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: French
# Sample Size: 32
# Seed value: 2022
# Train loss: 0.6797988414764404
# Epoch:  33%|███▎      | 1/3 [00:00<00:01,  1.13it/s]Validation Accuracy: 0.6902777777777778
# Train loss: 0.6923722624778748
# Epoch:  67%|██████▋   | 2/3 [00:01<00:00,  1.13it/s]Validation Accuracy: 0.6854166666666667
# Train loss: 0.6235540956258774
# Epoch: 100%|██████████| 3/3 [00:02<00:00,  1.12it/s]Validation Accuracy: 0.70625

# Macro F1 Score: 0.5861143984220907
# Accuracy score: 0.6475409836065574 

# ====================================================================================================

# The Average F1-Score of French for the sample size 32 is: 0.575968335059245
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: French
# Sample Size: 64
# Seed value: 2018
# Train loss: 0.6459776014089584
# Epoch:  33%|███▎      | 1/3 [00:01<00:02,  1.38s/it]Validation Accuracy: 0.5819444444444445
# Train loss: 0.6358266174793243
# Epoch:  67%|██████▋   | 2/3 [00:02<00:01,  1.38s/it]Validation Accuracy: 0.5944444444444444
# Train loss: 0.5609139576554298
# Epoch: 100%|██████████| 3/3 [00:04<00:00,  1.38s/it]Validation Accuracy: 0.6152777777777778

# Macro F1 Score: 0.585939393939394
# Accuracy score: 0.6844262295081968 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: French
# Sample Size: 64
# Seed value: 2019
# Train loss: 0.6718596518039703
# Epoch:  33%|███▎      | 1/3 [00:01<00:02,  1.36s/it]Validation Accuracy: 0.7090277777777778
# Train loss: 0.6017668321728706
# Epoch:  67%|██████▋   | 2/3 [00:02<00:01,  1.36s/it]Validation Accuracy: 0.6881944444444444
# Train loss: 0.6152493357658386
# Epoch: 100%|██████████| 3/3 [00:04<00:00,  1.37s/it]Validation Accuracy: 0.7090277777777778

# Macro F1 Score: 0.5710685163613258
# Accuracy score: 0.6598360655737705 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: French
# Sample Size: 64
# Seed value: 2020
# Train loss: 0.6989657580852509
# Epoch:  33%|███▎      | 1/3 [00:01<00:02,  1.38s/it]Validation Accuracy: 0.6791666666666667
# Train loss: 0.6375323385000229
# Epoch:  67%|██████▋   | 2/3 [00:02<00:01,  1.38s/it]Validation Accuracy: 0.6840277777777778
# Train loss: 0.7017572075128555
# Epoch: 100%|██████████| 3/3 [00:04<00:00,  1.38s/it]Validation Accuracy: 0.7055555555555555

# Macro F1 Score: 0.5300546448087431
# Accuracy score: 0.6475409836065574 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: French
# Sample Size: 64
# Seed value: 2021
# Train loss: 0.6795783191919327
# Epoch:  33%|███▎      | 1/3 [00:01<00:02,  1.36s/it]Validation Accuracy: 0.6826388888888889
# Train loss: 0.6669764071702957
# Epoch:  67%|██████▋   | 2/3 [00:02<00:01,  1.37s/it]Validation Accuracy: 0.6666666666666667
# Train loss: 0.6441071331501007
# Epoch: 100%|██████████| 3/3 [00:04<00:00,  1.37s/it]Validation Accuracy: 0.7027777777777777

# Macro F1 Score: 0.602646822370184
# Accuracy score: 0.6762295081967213 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: French
# Sample Size: 64
# Seed value: 2022
# Train loss: 0.7263007014989853
# Epoch:  33%|███▎      | 1/3 [00:01<00:02,  1.37s/it]Validation Accuracy: 0.6868055555555556
# Train loss: 0.688442125916481
# Epoch:  67%|██████▋   | 2/3 [00:02<00:01,  1.37s/it]Validation Accuracy: 0.7138888888888889
# Train loss: 0.645633727312088
# Epoch: 100%|██████████| 3/3 [00:04<00:00,  1.37s/it]Validation Accuracy: 0.6965277777777777

# Macro F1 Score: 0.5737704918032787
# Accuracy score: 0.680327868852459 

# ====================================================================================================

# The Average F1-Score of French for the sample size 64 is: 0.5726959738565851
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: French
# Sample Size: 128
# Seed value: 2018
# Train loss: 0.655342735350132
# Epoch:  33%|███▎      | 1/3 [00:02<00:04,  2.38s/it]Validation Accuracy: 0.6513888888888889
# Train loss: 0.6174135468900204
# Epoch:  67%|██████▋   | 2/3 [00:04<00:02,  2.36s/it]Validation Accuracy: 0.6555555555555556
# Train loss: 0.5184967927634716
# Epoch: 100%|██████████| 3/3 [00:07<00:00,  2.33s/it]Validation Accuracy: 0.6027777777777777

# Macro F1 Score: 0.579269592172818
# Accuracy score: 0.6352459016393442 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: French
# Sample Size: 128
# Seed value: 2019
# Train loss: 0.8273091316223145
# Epoch:  33%|███▎      | 1/3 [00:02<00:04,  2.27s/it]Validation Accuracy: 0.6458333333333333
# Train loss: 0.6768480017781258
# Epoch:  67%|██████▋   | 2/3 [00:04<00:02,  2.28s/it]Validation Accuracy: 0.6541666666666667
# Train loss: 0.5910334140062332
# Epoch: 100%|██████████| 3/3 [00:06<00:00,  2.28s/it]Validation Accuracy: 0.6791666666666667

# Macro F1 Score: 0.5303609565744719
# Accuracy score: 0.7008196721311475 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: French
# Sample Size: 128
# Seed value: 2020
# Train loss: 0.8005371615290642
# Epoch:  33%|███▎      | 1/3 [00:02<00:04,  2.31s/it]Validation Accuracy: 0.6993055555555555
# Train loss: 0.6893901415169239
# Epoch:  67%|██████▋   | 2/3 [00:04<00:02,  2.31s/it]Validation Accuracy: 0.6680555555555555
# Train loss: 0.642815001308918
# Epoch: 100%|██████████| 3/3 [00:06<00:00,  2.29s/it]Validation Accuracy: 0.7118055555555556

# Macro F1 Score: 0.5449459157030958
# Accuracy score: 0.6926229508196722 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: French
# Sample Size: 128
# Seed value: 2021
# Train loss: 0.8689212277531624
# Epoch:  33%|███▎      | 1/3 [00:02<00:04,  2.29s/it]Validation Accuracy: 0.6916666666666667
# Train loss: 0.7155642993748188
# Epoch:  67%|██████▋   | 2/3 [00:04<00:02,  2.28s/it]Validation Accuracy: 0.6729166666666667
# Train loss: 0.5929057709872723
# Epoch: 100%|██████████| 3/3 [00:06<00:00,  2.27s/it]Validation Accuracy: 0.6965277777777777

# Macro F1 Score: 0.605474710209287
# Accuracy score: 0.7090163934426229 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: French
# Sample Size: 128
# Seed value: 2022
# Train loss: 0.8722350895404816
# Epoch:  33%|███▎      | 1/3 [00:02<00:04,  2.26s/it]Validation Accuracy: 0.6930555555555555
# Train loss: 0.7364926487207413
# Epoch:  67%|██████▋   | 2/3 [00:04<00:02,  2.26s/it]Validation Accuracy: 0.6680555555555555
# Train loss: 0.6496948450803757
# Epoch: 100%|██████████| 3/3 [00:06<00:00,  2.25s/it]Validation Accuracy: 0.6888888888888889

# Macro F1 Score: 0.489272632129775
# Accuracy score: 0.6721311475409836 

# ====================================================================================================

# The Average F1-Score of French for the sample size 128 is: 0.5498647613578895
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: French
# Sample Size: 256
# Seed value: 2018
# Train loss: 0.7019825987517834
# Epoch:  33%|███▎      | 1/3 [00:04<00:08,  4.16s/it]Validation Accuracy: 0.6604166666666667
# Train loss: 0.6009964402765036
# Epoch:  67%|██████▋   | 2/3 [00:08<00:04,  4.15s/it]Validation Accuracy: 0.6381944444444445
# Train loss: 0.5826615393161774
# Epoch: 100%|██████████| 3/3 [00:12<00:00,  4.14s/it]Validation Accuracy: 0.6715277777777777

# Macro F1 Score: 0.5796578171091447
# Accuracy score: 0.7008196721311475 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: French
# Sample Size: 256
# Seed value: 2019
# Train loss: 0.7484750133007765
# Epoch:  33%|███▎      | 1/3 [00:04<00:08,  4.27s/it]Validation Accuracy: 0.6680555555555555
# Train loss: 0.6422950867563486
# Epoch:  67%|██████▋   | 2/3 [00:08<00:04,  4.27s/it]Validation Accuracy: 0.6777777777777778
# Train loss: 0.6350728757679462
# Epoch: 100%|██████████| 3/3 [00:12<00:00,  4.20s/it]Validation Accuracy: 0.6888888888888889

# Macro F1 Score: 0.44949670253384244
# Accuracy score: 0.680327868852459 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: French
# Sample Size: 256
# Seed value: 2020
# Train loss: 0.770460058003664
# Epoch:  33%|███▎      | 1/3 [00:04<00:08,  4.19s/it]Validation Accuracy: 0.6666666666666667
# Train loss: 0.6244156938046217
# Epoch:  67%|██████▋   | 2/3 [00:08<00:04,  4.16s/it]Validation Accuracy: 0.7465277777777778
# Train loss: 0.5974505543708801
# Epoch: 100%|██████████| 3/3 [00:12<00:00,  4.13s/it]Validation Accuracy: 0.7368055555555555

# Macro F1 Score: 0.5536166619757951
# Accuracy score: 0.680327868852459 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: French
# Sample Size: 256
# Seed value: 2021
# Train loss: 0.7984667364507914
# Epoch:  33%|███▎      | 1/3 [00:04<00:08,  4.18s/it]Validation Accuracy: 0.6826388888888889
# Train loss: 0.6605656389147043
# Epoch:  67%|██████▋   | 2/3 [00:08<00:04,  4.21s/it]Validation Accuracy: 0.6694444444444445
# Train loss: 0.5984538272023201
# Epoch: 100%|██████████| 3/3 [00:12<00:00,  4.20s/it]Validation Accuracy: 0.7201388888888889

# Macro F1 Score: 0.646376811594203
# Accuracy score: 0.7377049180327869 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: French
# Sample Size: 256
# Seed value: 2022
# Train loss: 0.7581952400505543
# Epoch:  33%|███▎      | 1/3 [00:04<00:08,  4.17s/it]Validation Accuracy: 0.6777777777777778
# Train loss: 0.6467741914093494
# Epoch:  67%|██████▋   | 2/3 [00:08<00:04,  4.17s/it]Validation Accuracy: 0.6826388888888889
# Train loss: 0.616688396781683
# Epoch: 100%|██████████| 3/3 [00:12<00:00,  4.17s/it]Validation Accuracy: 0.695138888888889

# Macro F1 Score: 0.5788466737371847
# Accuracy score: 0.6598360655737705 

# ====================================================================================================

# The Average F1-Score of French for the sample size 256 is: 0.561598933390034
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: French
# Sample Size: 854
# Seed value: 2018
# Train loss: 0.6781195357664308
# Epoch:  33%|███▎      | 1/3 [00:16<00:32, 16.16s/it]Validation Accuracy: 0.6006944444444444
# Train loss: 0.6145565398593447
# Epoch:  67%|██████▋   | 2/3 [00:32<00:16, 16.19s/it]Validation Accuracy: 0.725
# Train loss: 0.5731254535824505
# Epoch: 100%|██████████| 3/3 [00:48<00:00, 16.20s/it]Validation Accuracy: 0.6993055555555555

# Macro F1 Score: 0.6576339559764974
# Accuracy score: 0.7377049180327869 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: French
# Sample Size: 854
# Seed value: 2019
# Train loss: 0.6918660767042815
# Epoch:  33%|███▎      | 1/3 [00:16<00:32, 16.09s/it]Validation Accuracy: 0.6729166666666667
# Train loss: 0.6204586558377565
# Epoch:  67%|██████▋   | 2/3 [00:32<00:16, 16.11s/it]Validation Accuracy: 0.6868055555555556
# Train loss: 0.6163012954726148
# Epoch: 100%|██████████| 3/3 [00:48<00:00, 16.09s/it]Validation Accuracy: 0.6694444444444445

# Macro F1 Score: 0.602291810728045
# Accuracy score: 0.7295081967213115 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: French
# Sample Size: 854
# Seed value: 2020
# Train loss: 0.6794394445953085
# Epoch:  33%|███▎      | 1/3 [00:16<00:32, 16.09s/it]Validation Accuracy: 0.7416666666666667
# Train loss: 0.6194255218577029
# Epoch:  67%|██████▋   | 2/3 [00:32<00:16, 16.06s/it]Validation Accuracy: 0.726388888888889
# Train loss: 0.6067674351272299
# Epoch: 100%|██████████| 3/3 [00:48<00:00, 16.08s/it]Validation Accuracy: 0.7229166666666667

# Macro F1 Score: 0.587744294540411
# Accuracy score: 0.7254098360655737 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: French
# Sample Size: 854
# Seed value: 2021
# Train loss: 0.7012832364929256
# Epoch:  33%|███▎      | 1/3 [00:15<00:31, 16.00s/it]Validation Accuracy: 0.6631944444444444
# Train loss: 0.6275758783319103
# Epoch:  67%|██████▋   | 2/3 [00:32<00:16, 16.05s/it]Validation Accuracy: 0.6743055555555555
# Train loss: 0.6097398542646152
# Epoch: 100%|██████████| 3/3 [00:48<00:00, 16.19s/it]Validation Accuracy: 0.6979166666666667

# Macro F1 Score: 0.6244348244348246
# Accuracy score: 0.7377049180327869 

# ====================================================================================================
# t_total value of -1 results in schedule not being applied
# Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
# Model Summary:
# Language: French
# Sample Size: 854
# Seed value: 2022
# Train loss: 0.6619469825901202
# Epoch:  33%|███▎      | 1/3 [00:16<00:32, 16.35s/it]Validation Accuracy: 0.6555555555555556
# Train loss: 0.609012288833732
# Epoch:  67%|██████▋   | 2/3 [00:32<00:16, 16.35s/it]Validation Accuracy: 0.7041666666666667
# Train loss: 0.5956740076862165
# Epoch: 100%|██████████| 3/3 [00:49<00:00, 16.36s/it]Validation Accuracy: 0.7027777777777777

# Macro F1 Score: 0.6397085522015928
# Accuracy score: 0.75 

# ====================================================================================================

# The Average F1-Score of French for the sample size 854 is: 0.6223626875762742