In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install transformers

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/99/84/7bc03215279f603125d844bf81c3fb3f2d50fe8e511546eb4897e4be2067/transformers-4.0.0-py3-none-any.whl (1.4MB)
[K     |▎                               | 10kB 22.0MB/s eta 0:00:01[K     |▌                               | 20kB 29.6MB/s eta 0:00:01[K     |▊                               | 30kB 25.7MB/s eta 0:00:01[K     |█                               | 40kB 19.1MB/s eta 0:00:01[K     |█▏                              | 51kB 16.3MB/s eta 0:00:01[K     |█▌                              | 61kB 18.5MB/s eta 0:00:01[K     |█▊                              | 71kB 14.4MB/s eta 0:00:01[K     |██                              | 81kB 15.8MB/s eta 0:00:01[K     |██▏                             | 92kB 15.9MB/s eta 0:00:01[K     |██▍                             | 102kB 15.0MB/s eta 0:00:01[K     |██▋                             | 112kB 15.0MB/s eta 0:00:01[K     |███                             | 

In [3]:
!pip install emoji

Collecting emoji
[?25l  Downloading https://files.pythonhosted.org/packages/ff/1c/1f1457fe52d0b30cbeebfd578483cedb3e3619108d2d5a21380dfecf8ffd/emoji-0.6.0.tar.gz (51kB)
[K     |██████▍                         | 10kB 21.0MB/s eta 0:00:01[K     |████████████▉                   | 20kB 27.5MB/s eta 0:00:01[K     |███████████████████▎            | 30kB 26.6MB/s eta 0:00:01[K     |█████████████████████████▊      | 40kB 19.5MB/s eta 0:00:01[K     |████████████████████████████████| 51kB 5.4MB/s 
[?25hBuilding wheels for collected packages: emoji
  Building wheel for emoji (setup.py) ... [?25l[?25hdone
  Created wheel for emoji: filename=emoji-0.6.0-cp36-none-any.whl size=49716 sha256=2f8f094ca033ee68ca390e0bd151eb6180cf4ed1309c96a57326b5e04099bf72
  Stored in directory: /root/.cache/pip/wheels/46/2c/8b/9dcf5216ca68e14e0320e283692dce8ae321cdc01e73e17796
Successfully built emoji
Installing collected packages: emoji
Successfully installed emoji-0.6.0


In [4]:
import tensorflow as tf
import torch
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, RandomSampler
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from transformers import BertForSequenceClassification, BertTokenizer
from tqdm import tqdm, trange
import pandas as pd
import io
import os
import re
import emoji
import random

import nltk
nltk.download('stopwords')
from nltk import word_tokenize, pos_tag
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import sent_tokenize, TweetTokenizer
from nltk.corpus import wordnet, stopwords

import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score, accuracy_score
from statistics import mode


# specify GPU device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()
torch.cuda.get_device_name(0)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


'Tesla V100-SXM2-16GB'

In [5]:
def preprocess(df):
    
    #removes URL
    pattern = r'https.?://[^\s]+[\s]?'
    df["tweet"] = df["tweet"].str.replace(pat=pattern, repl="", regex=True)
    
    #removes usernames/mentions
    pattern = r'@[^\s]+'
    df["tweet"] = df["tweet"].str.replace(pat=pattern, repl="", regex=True)
    
    #removes emoji and smiley
    pattern = re.compile("["
                         u"\U0001F600-\U0001F64F"
                         u"\U0001F300-\U0001F5FF"
                         u"\U0001F680-\U0001F6FF"
                         u"\U0001F1E0-\U0001F1FF"
                         u"\U00002500-\U00002BEF"
                         u"\U00002702-\U000027B0"
                         u"\U00002702-\U000027B0"
                         u"\U000024C2-\U0001F251"
                         u"\U0001f926-\U0001f937"
                         u"\U00010000-\U0010ffff"
                         u"\u2640-\u2642"
                         u"\u2600-\u2B55"
                         u"\u200d"
                         u"\u23cf"
                         u"\u23e9"
                         u"\u231a"
                         u"\ufe0f"
                         u"\u3030"
                         "]+", flags=re.UNICODE)
    df["tweet"] = df["tweet"].str.replace(pat=pattern, repl="", regex=True)
    
    #removes numbers
    pattern = r'\d+'
    df["tweet"] = df["tweet"].str.replace(pat=pattern, repl="", regex=True)
    
    #removes punctuation
    pattern = r"[^\w\s]"
    df["tweet"] = df["tweet"].str.replace(pat=pattern, repl=" ", regex=True)

    #removes stop words
    stop_words = stopwords.words("english")    
    remove_stop_words = lambda row: " ".join([token for token in row.split(" ")
                                              if token not in stop_words])
    df["tweet"] = df["tweet"].apply(remove_stop_words)
    
    #removes extra spaces
    pattern = r"[\s]+"
    df["tweet"] = df["tweet"].str.replace(pat=pattern, repl=" ", regex=True)
    
    return(df)

In [6]:
def train_validate_test_split(df,seed, train_percent=.8, validate_percent=.125):
  train, test = train_test_split(df, train_size=train_percent, stratify=df['label'], random_state=seed)
  train, validate = train_test_split(train, test_size=validate_percent, stratify=train['label'], random_state=seed)
  return train, validate, test

def sample_data(df,sample,seed):
    X_train, _, y_train, _ = train_test_split( df['tweet'], df['label'], train_size=sample, random_state=seed, stratify=df['label'])
    return pd.concat([X_train,y_train], axis = 1 )

def tokenize_data(df):
    sentences = ["[CLS] " + query + " [SEP]" for query in df['tweet']]
    # Tokenize with multilingual BERT tokenizer
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
    #tokenizer = AutoTokenizer.from_pretrained("asafaya/bert-base-arabic")
    tokenized_texts = [tokenizer.tokenize(sent) for sent in sentences]
    
    MAX_LEN = 128

    # Use the BERT tokenizer to convert the tokens to their index numbers in the BERT vocabulary
    input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts],
                          maxlen=MAX_LEN, dtype="long", truncating="post", padding="post")
    # Create attention masks
    attention_masks = []
    # Create a mask of 1s for each token followed by 0s for padding
    for seq in input_ids:
        seq_mask = [float(i>0) for i in seq]
        attention_masks.append(seq_mask)
    return input_ids, attention_masks

def Data_Loader(inputs_ids, attention_masks, df,batch_size=16): 
    data = TensorDataset(torch.LongTensor(inputs_ids), torch.LongTensor(attention_masks), torch.LongTensor(df['label'].values))
    sampler = RandomSampler(data)
    dataloader = DataLoader(data, sampler=sampler, batch_size=batch_size)
    return dataloader

In [7]:
def model_train(model, train_dataloader, validation_dataloader):
    # BERT training loop
    epochs = 3
    for _ in trange(epochs, desc="Epoch"):  
        # Set our model to training mode
        model.train()
        # Tracking variables
        tr_loss = 0
        nb_tr_examples, nb_tr_steps = 0, 0
        # Train the data for one epoch
        for step, batch in enumerate(train_dataloader):
            # Add batch to GPU
            batch = tuple(t.to(device) for t in batch)
            # Unpack the inputs from our dataloader
            b_input_ids, b_input_mask, b_labels = batch
            # Clear out the gradients (by default they accumulate)
            optimizer.zero_grad()
            # Forward pass
            outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)
            loss = outputs["loss"]
            # Backward pass
            loss.backward()
            # Update parameters and take a step using the computed gradient
            optimizer.step()
            # Update tracking variables
            tr_loss += loss.item()
            nb_tr_examples += b_input_ids.size(0)
            nb_tr_steps += 1
        print("Train loss: {}".format(tr_loss/nb_tr_steps))

        ## VALIDATION

        # Put model in evaluation mode
        model.eval()
        # Tracking variables 
        eval_loss, eval_accuracy = 0, 0
        nb_eval_steps, nb_eval_examples = 0, 0
        # Evaluate data for one epoch
        for batch in validation_dataloader:
            # Add batch to GPU
            batch = tuple(t.to(device) for t in batch)
            # Unpack the inputs from our dataloader
            b_input_ids, b_input_mask, b_labels = batch
            # Telling the model not to compute or store gradients, saving memory and speeding up validation
            with torch.no_grad():
              # Forward pass, calculate logit predictions
                logits = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)[0]
            # Move logits and labels to CPU
            logits = logits.detach().cpu().numpy()
            label_ids = b_labels.to('cpu').numpy()
            tmp_eval_accuracy = flat_accuracy(logits, label_ids)    
            eval_accuracy += tmp_eval_accuracy
            nb_eval_steps += 1
        validation_accuracy = (eval_accuracy/nb_eval_steps)
        print("Validation Accuracy: {}".format(eval_accuracy/nb_eval_steps))
    return validation_accuracy

In [8]:
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

def model_test(model,prediction_dataloader):
    model.eval()
    # Tracking variables 
    predictions , true_labels = [], []
# Predict
    for batch in prediction_dataloader:
        # Add batch to GPU
        batch = tuple(t.to(device) for t in batch)
        # Unpack the inputs from our dataloader
        b_input_ids, b_input_mask, b_labels = batch
        # Telling the model not to compute or store gradients, saving memory and speeding up prediction
        with torch.no_grad():
            # Forward pass, calculate logit predictions
            logits = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)[0]
        # Move logits and labels to CPU
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()
        predictions+=list(np.argmax(logits, axis=1).flatten())
        true_labels+=list(label_ids.flatten())
    test_f1_score = f1_score(true_labels, predictions, average= 'macro')
    print("Macro F1 Score:",test_f1_score)
    test_f1_wscore = f1_score(true_labels, predictions, average= 'weighted')
    print("Weighted F1 Score:",test_f1_wscore)
    test_accuracy_score = accuracy_score(true_labels, predictions)
    print("Accuracy score:", test_accuracy_score, "\n")
    print("="*100)
    return test_f1_score, test_f1_wscore

In [9]:
def model_initialise():
  # Load BertForSequenceClassification, the pretrained BERT model with a single linear classification layer on top. 
  model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2).cuda()
  optimizer = optim.AdamW(params = model.parameters(), lr=2e-5)
  return model, optimizer

In [10]:
print_stmts= []
languages = ['English']
directory = 'drive/My Drive/CS695'
for lang in languages:
  df = pd.read_csv(os.path.join(directory, lang+'.csv'))
  df = preprocess(df)
  sample_sizes = [16, 32, 64, 128, 256]
for sample in sample_sizes: 
  seeds = [2018,2019, 2020, 2021, 2022]
  weighted = []
  macro = []
  for seed in seeds:
    np.random.seed(seed)
    train_df, validation_df, test_df = train_validate_test_split(df, seed)
    train_len = len(train_df)
    if sample==256 and seed==2022:
        sample_sizes.append(train_len)
    if sample == train_len and seed == 2022:
        sample_sizes.remove(train_len)
    model, optimizer = model_initialise()
    if(sample != train_len):  
      train_df_sample = sample_data(train_df,sample,seed)
      train_input_ids, train_attention_masks = tokenize_data(train_df_sample)
      train_dataloader = Data_Loader(train_input_ids, train_attention_masks, train_df_sample)
    else:
      train_input_ids, train_attention_masks = tokenize_data(train_df)
      train_dataloader = Data_Loader(train_input_ids, train_attention_masks, train_df)

    validation_input_ids, validation_attention_masks = tokenize_data(validation_df)
    validation_dataloader = Data_Loader(validation_input_ids, validation_attention_masks, validation_df)
    print("\nModel Summary:")
    print('Language:', lang)
    print('Sample Size:', sample)
    print('Seed value:', seed)
    validation_accuracy = model_train(model, train_dataloader, validation_dataloader)
    test_input_ids, test_attention_masks = tokenize_data(test_df)
    test_dataloader = Data_Loader(test_input_ids, test_attention_masks, test_df)
    m, w = model_test(model, test_dataloader)
    weighted.append(w)
    macro.append(m)
  print("The Average  Weighted F1-Score of the Language ", lang, "is:",sum(weighted)/ len(weighted))
  print("The Average  Macro F1-Score of the Language ", lang, "is:",sum(macro)/ len(macro))
  print("="*200)
  print_stmts.append(" For Sample Size "+str(sample)+" Average Weighted F1-Score "+str(sum(weighted)/len(weighted))+" and Average Macro F1-Score "+str(sum(macro)/len(macro))+" of "+ str(lang))
for i in print_stmts:
  print(i,"\n")
print("="*100+str(lang)+"="*100)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=433.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=440473133.0, style=ProgressStyle(descri…




Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…




Epoch:   0%|          | 0/3 [00:00<?, ?it/s]


Model Summary:
Language: English
Sample Size: 16
Seed value: 2018
Train loss: 0.7875593900680542


Epoch:  33%|███▎      | 1/3 [00:24<00:48, 24.19s/it]

Validation Accuracy: 0.445201062215478
Train loss: 0.6731244325637817


Epoch:  67%|██████▋   | 2/3 [00:48<00:24, 24.17s/it]

Validation Accuracy: 0.7660976226605969
Train loss: 0.6439403295516968


Epoch: 100%|██████████| 3/3 [01:12<00:00, 24.15s/it]

Validation Accuracy: 0.799772382397572





Macro F1 Score: 0.4482126969173601
Weighted F1 Score: 0.7187710759965317
Accuracy score: 0.8001517594612539 



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Model Summary:
Language: English
Sample Size: 16
Seed value: 2019
Train loss: 0.5847092866897583


Epoch:  33%|███▎      | 1/3 [00:24<00:48, 24.13s/it]

Validation Accuracy: 0.8065123925139099
Train loss: 0.519483208656311


Epoch:  67%|██████▋   | 2/3 [00:48<00:24, 24.14s/it]

Validation Accuracy: 0.8066831057157309
Train loss: 0.5039225220680237


Epoch: 100%|██████████| 3/3 [01:12<00:00, 24.15s/it]

Validation Accuracy: 0.8069865958523015





Macro F1 Score: 0.4468043334899457
Weighted F1 Score: 0.7208237528542989
Accuracy score: 0.8068860855543963 



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Model Summary:
Language: English
Sample Size: 16
Seed value: 2020
Train loss: 0.8405503034591675


Epoch:  33%|███▎      | 1/3 [00:24<00:48, 24.13s/it]

Validation Accuracy: 0.2877845220030349
Train loss: 0.753589928150177


Epoch:  67%|██████▋   | 2/3 [00:48<00:24, 24.13s/it]

Validation Accuracy: 0.7458965604451189
Train loss: 0.6672976613044739


Epoch: 100%|██████████| 3/3 [01:12<00:00, 24.14s/it]

Validation Accuracy: 0.8013783510369247





Macro F1 Score: 0.4489114586382593
Weighted F1 Score: 0.7198492862144559
Accuracy score: 0.8022858768851371 



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Model Summary:
Language: English
Sample Size: 16
Seed value: 2021
Train loss: 0.7035276889801025


Epoch:  33%|███▎      | 1/3 [00:24<00:48, 24.18s/it]

Validation Accuracy: 0.786210166919575
Train loss: 0.6347970366477966


Epoch:  67%|██████▋   | 2/3 [00:48<00:24, 24.18s/it]

Validation Accuracy: 0.8032751643904906
Train loss: 0.5618079900741577


Epoch: 100%|██████████| 3/3 [01:12<00:00, 24.19s/it]

Validation Accuracy: 0.8067842690945878





Macro F1 Score: 0.4465616797900262
Weighted F1 Score: 0.720733523978667
Accuracy score: 0.8068860855543963 



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Model Summary:
Language: English
Sample Size: 16
Seed value: 2022
Train loss: 0.92399001121521


Epoch:  33%|███▎      | 1/3 [00:24<00:48, 24.16s/it]

Validation Accuracy: 0.35701188669701567
Train loss: 0.7024259567260742


Epoch:  67%|██████▋   | 2/3 [00:48<00:24, 24.16s/it]

Validation Accuracy: 0.7229577642893272
Train loss: 0.5937894582748413


Epoch: 100%|██████████| 3/3 [01:12<00:00, 24.18s/it]

Validation Accuracy: 0.7968259989883663





Macro F1 Score: 0.45190427525506366
Weighted F1 Score: 0.7191683793573067
Accuracy score: 0.797638243384236 

The Average  Weighted F1-Score of the Language  English is: 0.7198692036802521
The Average  Macro F1-Score of the Language  English is: 0.44847888881813097


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Model Summary:
Language: English
Sample Size: 32
Seed value: 2018
Train loss: 0.5736183822154999


Epoch:  33%|███▎      | 1/3 [00:24<00:48, 24.36s/it]

Validation Accuracy: 0.8069865958523015
Train loss: 0.49169690907001495


Epoch:  67%|██████▋   | 2/3 [00:48<00:24, 24.37s/it]

Validation Accuracy: 0.8069613050075873
Train loss: 0.4356447160243988


Epoch: 100%|██████████| 3/3 [01:13<00:00, 24.39s/it]

Validation Accuracy: 0.8069676277187657





Macro F1 Score: 0.4465907301453992
Weighted F1 Score: 0.7207804101445615
Accuracy score: 0.80698093521768 



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Model Summary:
Language: English
Sample Size: 32
Seed value: 2019
Train loss: 0.5648916661739349


Epoch:  33%|███▎      | 1/3 [00:24<00:48, 24.35s/it]

Validation Accuracy: 0.8069865958523015
Train loss: 0.5168718099594116


Epoch:  67%|██████▋   | 2/3 [00:48<00:24, 24.37s/it]

Validation Accuracy: 0.8069739504299444
Train loss: 0.4956187754869461


Epoch: 100%|██████████| 3/3 [01:13<00:00, 24.38s/it]

Validation Accuracy: 0.8069676277187657





Macro F1 Score: 0.4465907301453992
Weighted F1 Score: 0.7207804101445615
Accuracy score: 0.80698093521768 



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Model Summary:
Language: English
Sample Size: 32
Seed value: 2020
Train loss: 0.769576907157898


Epoch:  33%|███▎      | 1/3 [00:24<00:48, 24.36s/it]

Validation Accuracy: 0.7131638846737481
Train loss: 0.6135363578796387


Epoch:  67%|██████▋   | 2/3 [00:48<00:24, 24.38s/it]

Validation Accuracy: 0.8049001011633788
Train loss: 0.5170315206050873


Epoch: 100%|██████████| 3/3 [01:13<00:00, 24.39s/it]

Validation Accuracy: 0.8068854324734446





Macro F1 Score: 0.446576205348941
Weighted F1 Score: 0.7207569676769022
Accuracy score: 0.8069335103860381 



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Model Summary:
Language: English
Sample Size: 32
Seed value: 2021
Train loss: 0.8208950459957123


Epoch:  33%|███▎      | 1/3 [00:24<00:48, 24.26s/it]

Validation Accuracy: 0.689339908952959
Train loss: 0.6238411664962769


Epoch:  67%|██████▋   | 2/3 [00:48<00:24, 24.27s/it]

Validation Accuracy: 0.8024152756702073
Train loss: 0.49041788280010223


Epoch: 100%|██████████| 3/3 [01:12<00:00, 24.29s/it]

Validation Accuracy: 0.8068854324734446





Macro F1 Score: 0.446576205348941
Weighted F1 Score: 0.7207569676769022
Accuracy score: 0.8069335103860381 



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Model Summary:
Language: English
Sample Size: 32
Seed value: 2022
Train loss: 0.619285374879837


Epoch:  33%|███▎      | 1/3 [00:24<00:48, 24.25s/it]

Validation Accuracy: 0.8069739504299444
Train loss: 0.5126817226409912


Epoch:  67%|██████▋   | 2/3 [00:48<00:24, 24.25s/it]

Validation Accuracy: 0.8069865958523015
Train loss: 0.4561086446046829


Epoch: 100%|██████████| 3/3 [01:12<00:00, 24.27s/it]

Validation Accuracy: 0.8069802731411229





Macro F1 Score: 0.446576205348941
Weighted F1 Score: 0.7207569676769022
Accuracy score: 0.8069335103860381 

The Average  Weighted F1-Score of the Language  English is: 0.7207663446639659
The Average  Macro F1-Score of the Language  English is: 0.4465820152675243


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Model Summary:
Language: English
Sample Size: 64
Seed value: 2018
Train loss: 0.7115506231784821


Epoch:  33%|███▎      | 1/3 [00:24<00:49, 24.57s/it]

Validation Accuracy: 0.8044258978249874
Train loss: 0.5111775249242783


Epoch:  67%|██████▋   | 2/3 [00:49<00:24, 24.57s/it]

Validation Accuracy: 0.80699291856348
Train loss: 0.44186533242464066


Epoch: 100%|██████████| 3/3 [01:13<00:00, 24.58s/it]

Validation Accuracy: 0.8069802731411229





Macro F1 Score: 0.4465907301453992
Weighted F1 Score: 0.7207804101445615
Accuracy score: 0.80698093521768 



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Model Summary:
Language: English
Sample Size: 64
Seed value: 2019
Train loss: 0.6426061540842056


Epoch:  33%|███▎      | 1/3 [00:24<00:49, 24.55s/it]

Validation Accuracy: 0.8069739504299444
Train loss: 0.5351106598973274


Epoch:  67%|██████▋   | 2/3 [00:49<00:24, 24.56s/it]

Validation Accuracy: 0.8069739504299444
Train loss: 0.4450599402189255


Epoch: 100%|██████████| 3/3 [01:13<00:00, 24.57s/it]

Validation Accuracy: 0.8069613050075873





Macro F1 Score: 0.4465907301453992
Weighted F1 Score: 0.7207804101445615
Accuracy score: 0.80698093521768 



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Model Summary:
Language: English
Sample Size: 64
Seed value: 2020
Train loss: 0.9408404678106308


Epoch:  33%|███▎      | 1/3 [00:24<00:49, 24.55s/it]

Validation Accuracy: 0.19339276681841175
Train loss: 0.7207417786121368


Epoch:  67%|██████▋   | 2/3 [00:49<00:24, 24.56s/it]

Validation Accuracy: 0.7951251896813353
Train loss: 0.609770193696022


Epoch: 100%|██████████| 3/3 [01:13<00:00, 24.57s/it]

Validation Accuracy: 0.8041350531107738





Macro F1 Score: 0.44579073251504714
Weighted F1 Score: 0.7194892444727347
Accuracy score: 0.8043725694773783 



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Model Summary:
Language: English
Sample Size: 64
Seed value: 2021
Train loss: 0.6817275583744049


Epoch:  33%|███▎      | 1/3 [00:24<00:49, 24.53s/it]

Validation Accuracy: 0.8066009104704096
Train loss: 0.573964037001133


Epoch:  67%|██████▋   | 2/3 [00:49<00:24, 24.55s/it]

Validation Accuracy: 0.8069739504299444
Train loss: 0.47324303537607193


Epoch: 100%|██████████| 3/3 [01:13<00:00, 24.56s/it]

Validation Accuracy: 0.8069739504299444





Macro F1 Score: 0.4465907301453992
Weighted F1 Score: 0.7207804101445615
Accuracy score: 0.80698093521768 



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Model Summary:
Language: English
Sample Size: 64
Seed value: 2022
Train loss: 0.5694792568683624


Epoch:  33%|███▎      | 1/3 [00:24<00:49, 24.56s/it]

Validation Accuracy: 0.8069865958523015
Train loss: 0.474591888487339


Epoch:  67%|██████▋   | 2/3 [00:49<00:24, 24.57s/it]

Validation Accuracy: 0.8069739504299444
Train loss: 0.46519477665424347


Epoch: 100%|██████████| 3/3 [01:13<00:00, 24.58s/it]

Validation Accuracy: 0.8069676277187657





Macro F1 Score: 0.4465907301453992
Weighted F1 Score: 0.7207804101445615
Accuracy score: 0.80698093521768 

The Average  Weighted F1-Score of the Language  English is: 0.720522177010196
The Average  Macro F1-Score of the Language  English is: 0.4464307306193288


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Model Summary:
Language: English
Sample Size: 128
Seed value: 2018
Train loss: 0.6359152272343636


Epoch:  33%|███▎      | 1/3 [00:25<00:50, 25.11s/it]

Validation Accuracy: 0.8069865958523015
Train loss: 0.5127941723912954


Epoch:  67%|██████▋   | 2/3 [00:50<00:25, 25.11s/it]

Validation Accuracy: 0.8069739504299444
Train loss: 0.49774531088769436


Epoch: 100%|██████████| 3/3 [01:15<00:00, 25.11s/it]

Validation Accuracy: 0.8069865958523015





Macro F1 Score: 0.4465907301453992
Weighted F1 Score: 0.7207804101445615
Accuracy score: 0.80698093521768 



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Model Summary:
Language: English
Sample Size: 128
Seed value: 2019
Train loss: 0.684056006371975


Epoch:  33%|███▎      | 1/3 [00:25<00:50, 25.09s/it]

Validation Accuracy: 0.8069802731411229
Train loss: 0.49259624630212784


Epoch:  67%|██████▋   | 2/3 [00:50<00:25, 25.11s/it]

Validation Accuracy: 0.8069865958523015
Train loss: 0.46320628374814987


Epoch: 100%|██████████| 3/3 [01:15<00:00, 25.13s/it]

Validation Accuracy: 0.8069802731411229





Macro F1 Score: 0.4465907301453992
Weighted F1 Score: 0.7207804101445615
Accuracy score: 0.80698093521768 



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Model Summary:
Language: English
Sample Size: 128
Seed value: 2020
Train loss: 0.8494908958673477


Epoch:  33%|███▎      | 1/3 [00:25<00:50, 25.12s/it]

Validation Accuracy: 0.26984066767830045
Train loss: 0.6163941696286201


Epoch:  67%|██████▋   | 2/3 [00:50<00:25, 25.13s/it]

Validation Accuracy: 0.8040402124430955
Train loss: 0.5028543435037136


Epoch: 100%|██████████| 3/3 [01:15<00:00, 25.13s/it]

Validation Accuracy: 0.8069549822964086





Macro F1 Score: 0.4465907301453992
Weighted F1 Score: 0.7207804101445615
Accuracy score: 0.80698093521768 



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Model Summary:
Language: English
Sample Size: 128
Seed value: 2021
Train loss: 0.6199037060141563


Epoch:  33%|███▎      | 1/3 [00:25<00:50, 25.09s/it]

Validation Accuracy: 0.8069802731411229
Train loss: 0.4944848082959652


Epoch:  67%|██████▋   | 2/3 [00:50<00:25, 25.10s/it]

Validation Accuracy: 0.8069865958523015
Train loss: 0.4576500430703163


Epoch: 100%|██████████| 3/3 [01:15<00:00, 25.11s/it]

Validation Accuracy: 0.8069739504299444





Macro F1 Score: 0.4465907301453992
Weighted F1 Score: 0.7207804101445615
Accuracy score: 0.80698093521768 



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Model Summary:
Language: English
Sample Size: 128
Seed value: 2022
Train loss: 0.5141502618789673


Epoch:  33%|███▎      | 1/3 [00:25<00:50, 25.11s/it]

Validation Accuracy: 0.8069613050075873
Train loss: 0.4454524926841259


Epoch:  67%|██████▋   | 2/3 [00:50<00:25, 25.12s/it]

Validation Accuracy: 0.807163631765301
Train loss: 0.3759736381471157


Epoch: 100%|██████████| 3/3 [01:15<00:00, 25.14s/it]

Validation Accuracy: 0.8091679312089024





Macro F1 Score: 0.47823961038388557
Weighted F1 Score: 0.7336494859052567
Accuracy score: 0.8099212747794745 

The Average  Weighted F1-Score of the Language  English is: 0.7233542252967006
The Average  Macro F1-Score of the Language  English is: 0.45292050619309643


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Model Summary:
Language: English
Sample Size: 256
Seed value: 2018
Train loss: 0.6375926714390516


Epoch:  33%|███▎      | 1/3 [00:26<00:52, 26.22s/it]

Validation Accuracy: 0.8069865958523015
Train loss: 0.48619673773646355


Epoch:  67%|██████▋   | 2/3 [00:52<00:26, 26.22s/it]

Validation Accuracy: 0.8069676277187657
Train loss: 0.44688101578503847


Epoch: 100%|██████████| 3/3 [01:18<00:00, 26.22s/it]

Validation Accuracy: 0.8076378351036925





Macro F1 Score: 0.44882733032694433
Weighted F1 Score: 0.7216841951897373
Accuracy score: 0.8071706345442474 



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Model Summary:
Language: English
Sample Size: 256
Seed value: 2019
Train loss: 0.5096036065369844


Epoch:  33%|███▎      | 1/3 [00:26<00:52, 26.19s/it]

Validation Accuracy: 0.8069865958523015
Train loss: 0.4686730206012726


Epoch:  67%|██████▋   | 2/3 [00:52<00:26, 26.20s/it]

Validation Accuracy: 0.8069865958523015
Train loss: 0.44172619190067053


Epoch: 100%|██████████| 3/3 [01:18<00:00, 26.22s/it]

Validation Accuracy: 0.8067716236722307





Macro F1 Score: 0.45057027288727
Weighted F1 Score: 0.7224043200561124
Accuracy score: 0.8073603338708147 



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Model Summary:
Language: English
Sample Size: 256
Seed value: 2020
Train loss: 0.5774361547082663


Epoch:  33%|███▎      | 1/3 [00:26<00:52, 26.21s/it]

Validation Accuracy: 0.8069676277187657
Train loss: 0.5036590602248907


Epoch:  67%|██████▋   | 2/3 [00:52<00:26, 26.22s/it]

Validation Accuracy: 0.8069992412746586
Train loss: 0.49005321599543095


Epoch: 100%|██████████| 3/3 [01:18<00:00, 26.22s/it]

Validation Accuracy: 0.8069865958523015





Macro F1 Score: 0.4465907301453992
Weighted F1 Score: 0.7207804101445615
Accuracy score: 0.80698093521768 



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Model Summary:
Language: English
Sample Size: 256
Seed value: 2021
Train loss: 0.543737506493926


Epoch:  33%|███▎      | 1/3 [00:26<00:52, 26.21s/it]

Validation Accuracy: 0.8069865958523015
Train loss: 0.47732597403228283


Epoch:  67%|██████▋   | 2/3 [00:52<00:26, 26.21s/it]

Validation Accuracy: 0.8069739504299444
Train loss: 0.4509029798209667


Epoch: 100%|██████████| 3/3 [01:18<00:00, 26.21s/it]

Validation Accuracy: 0.8069739504299444





Macro F1 Score: 0.4465907301453992
Weighted F1 Score: 0.7207804101445615
Accuracy score: 0.80698093521768 



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Model Summary:
Language: English
Sample Size: 256
Seed value: 2022
Train loss: 0.5944087002426386


Epoch:  33%|███▎      | 1/3 [00:26<00:52, 26.17s/it]

Validation Accuracy: 0.8069865958523015
Train loss: 0.48949686251580715


Epoch:  67%|██████▋   | 2/3 [00:52<00:26, 26.18s/it]

Validation Accuracy: 0.8069613050075873
Train loss: 0.4417425664141774


Epoch: 100%|██████████| 3/3 [01:18<00:00, 26.20s/it]

Validation Accuracy: 0.8069802731411229





Macro F1 Score: 0.4465907301453992
Weighted F1 Score: 0.7207804101445615
Accuracy score: 0.80698093521768 

The Average  Weighted F1-Score of the Language  English is: 0.7212859491359068
The Average  Macro F1-Score of the Language  English is: 0.4478339587300824


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Model Summary:
Language: English
Sample Size: 73797
Seed value: 2018
Train loss: 0.3373182622989191


Epoch:  33%|███▎      | 1/3 [10:55<21:50, 655.11s/it]

Validation Accuracy: 0.8591489630753667
Train loss: 0.27370127173834746


Epoch:  67%|██████▋   | 2/3 [21:50<10:55, 655.30s/it]

Validation Accuracy: 0.8624683864441073
Train loss: 0.2000536209880036


Epoch: 100%|██████████| 3/3 [32:46<00:00, 655.62s/it]

Validation Accuracy: 0.8624747091552857





Macro F1 Score: 0.7568188675349905
Weighted F1 Score: 0.8564799221171775
Accuracy score: 0.8651712036422271 



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Model Summary:
Language: English
Sample Size: 73797
Seed value: 2019
Train loss: 0.3366703117036546


Epoch:  33%|███▎      | 1/3 [10:55<21:50, 655.11s/it]

Validation Accuracy: 0.8692842690945878
Train loss: 0.2697998459396188


Epoch:  67%|██████▋   | 2/3 [21:50<10:55, 655.14s/it]

Validation Accuracy: 0.8735710672736469
Train loss: 0.19448424941451714


Epoch: 100%|██████████| 3/3 [32:46<00:00, 655.39s/it]

Validation Accuracy: 0.8637898330804248





Macro F1 Score: 0.7680278460883665
Weighted F1 Score: 0.859515409822528
Accuracy score: 0.8637484586929717 



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Model Summary:
Language: English
Sample Size: 73797
Seed value: 2020
Train loss: 0.3402275484941178


Epoch:  33%|███▎      | 1/3 [10:55<21:51, 655.62s/it]

Validation Accuracy: 0.866167172483561
Train loss: 0.27801150006419556


Epoch:  67%|██████▋   | 2/3 [21:51<10:55, 655.55s/it]

Validation Accuracy: 0.8694739504299444
Train loss: 0.20649719456656507


Epoch: 100%|██████████| 3/3 [32:46<00:00, 655.56s/it]

Validation Accuracy: 0.8621838644410723





Macro F1 Score: 0.7729272315691139
Weighted F1 Score: 0.8628064979092723
Accuracy score: 0.8673053210661102 



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Model Summary:
Language: English
Sample Size: 73797
Seed value: 2021
Train loss: 0.3375951662367307


Epoch:  33%|███▎      | 1/3 [10:55<21:50, 655.01s/it]

Validation Accuracy: 0.8685318664643399
Train loss: 0.2728082306792892


Epoch:  67%|██████▋   | 2/3 [21:49<10:54, 654.80s/it]

Validation Accuracy: 0.869663631765301
Train loss: 0.19889350783565612


Epoch: 100%|██████████| 3/3 [32:40<00:00, 653.46s/it]

Validation Accuracy: 0.8624747091552857





Macro F1 Score: 0.7610622349783038
Weighted F1 Score: 0.8550534674563096
Accuracy score: 0.8591482500237124 



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Model Summary:
Language: English
Sample Size: 73797
Seed value: 2022
Train loss: 0.3366412019162059


Epoch:  33%|███▎      | 1/3 [10:49<21:38, 649.02s/it]

Validation Accuracy: 0.8626390996459281
Train loss: 0.2707253077309502


Epoch:  67%|██████▋   | 2/3 [21:39<10:49, 649.33s/it]

Validation Accuracy: 0.865882650480526
Train loss: 0.19470781755417427


Epoch: 100%|██████████| 3/3 [32:26<00:00, 648.90s/it]

Validation Accuracy: 0.8582005563985837





Macro F1 Score: 0.7658835518923394
Weighted F1 Score: 0.8574328750006363
Accuracy score: 0.8608555439628189 

The Average  Weighted F1-Score of the Language  English is: 0.8582576344611847
The Average  Macro F1-Score of the Language  English is: 0.7649439464126229
 For Sample Size 16 Average Weighted F1-Score 0.7198692036802521 and Average Macro F1-Score 0.44847888881813097 of English 

 For Sample Size 32 Average Weighted F1-Score 0.7207663446639659 and Average Macro F1-Score 0.4465820152675243 of English 

 For Sample Size 64 Average Weighted F1-Score 0.720522177010196 and Average Macro F1-Score 0.4464307306193288 of English 

 For Sample Size 128 Average Weighted F1-Score 0.7233542252967006 and Average Macro F1-Score 0.45292050619309643 of English 

 For Sample Size 256 Average Weighted F1-Score 0.7212859491359068 and Average Macro F1-Score 0.4478339587300824 of English 

 For Sample Size 73797 Average Weighted F1-Score 0.8582576344611847 and Average Macro F1-Score 0.764943946412622