In [19]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [20]:
!pip install transformers



In [21]:
!pip install emoji



In [22]:
import tensorflow as tf
import torch
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, RandomSampler
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from transformers import BertForSequenceClassification, BertTokenizer
from tqdm import tqdm, trange
import pandas as pd
import io
import os
import re
import emoji
import random

import nltk
nltk.download('stopwords')
from nltk import word_tokenize, pos_tag
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import sent_tokenize, TweetTokenizer
from nltk.corpus import wordnet, stopwords

import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score, accuracy_score
from statistics import mode


# specify GPU device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()
torch.cuda.get_device_name(0)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


'Tesla V100-SXM2-16GB'

In [23]:
def preprocess(df):
    
    #removes URL
    pattern = r'https.?://[^\s]+[\s]?'
    df["tweet"] = df["tweet"].str.replace(pat=pattern, repl="", regex=True)
    
    #removes usernames/mentions
    pattern = r'@[^\s]+'
    df["tweet"] = df["tweet"].str.replace(pat=pattern, repl="", regex=True)
    
    #removes emoji and smiley
    pattern = re.compile("["
                         u"\U0001F600-\U0001F64F"
                         u"\U0001F300-\U0001F5FF"
                         u"\U0001F680-\U0001F6FF"
                         u"\U0001F1E0-\U0001F1FF"
                         u"\U00002500-\U00002BEF"
                         u"\U00002702-\U000027B0"
                         u"\U00002702-\U000027B0"
                         u"\U000024C2-\U0001F251"
                         u"\U0001f926-\U0001f937"
                         u"\U00010000-\U0010ffff"
                         u"\u2640-\u2642"
                         u"\u2600-\u2B55"
                         u"\u200d"
                         u"\u23cf"
                         u"\u23e9"
                         u"\u231a"
                         u"\ufe0f"
                         u"\u3030"
                         "]+", flags=re.UNICODE)
    df["tweet"] = df["tweet"].str.replace(pat=pattern, repl="", regex=True)
    
    #removes numbers
    pattern = r'\d+'
    df["tweet"] = df["tweet"].str.replace(pat=pattern, repl="", regex=True)
    
    #removes punctuation
    pattern = r"[^\w\s]"
    df["tweet"] = df["tweet"].str.replace(pat=pattern, repl=" ", regex=True)

    #removes stop words
    stop_words = stopwords.words("english")    
    remove_stop_words = lambda row: " ".join([token for token in row.split(" ")
                                              if token not in stop_words])
    df["tweet"] = df["tweet"].apply(remove_stop_words)
    
    #removes extra spaces
    pattern = r"[\s]+"
    df["tweet"] = df["tweet"].str.replace(pat=pattern, repl=" ", regex=True)
    
    return(df)

In [24]:
def train_validate_test_split(df,seed, train_percent=.8, validate_percent=.125):
  train, test = train_test_split(df, train_size=train_percent, stratify=df['label'], random_state=seed)
  train, validate = train_test_split(train, test_size=validate_percent, stratify=train['label'], random_state=seed)
  return train, validate, test

def sample_data(df,sample,seed):
    X_train, _, y_train, _ = train_test_split( df['tweet'], df['label'], train_size=sample, random_state=seed, stratify=df['label'])
    return pd.concat([X_train,y_train], axis = 1 )

def tokenize_data(df):
    sentences = ["[CLS] " + query + " [SEP]" for query in df['tweet']]
    # Tokenize with multilingual BERT tokenizer
    tokenizer = BertTokenizer.from_pretrained('asafaya/bert-base-arabic', do_lower_case=True)
    #tokenizer = AutoTokenizer.from_pretrained("asafaya/bert-base-arabic")
    tokenized_texts = [tokenizer.tokenize(sent) for sent in sentences]
    
    MAX_LEN = 128

    # Use the BERT tokenizer to convert the tokens to their index numbers in the BERT vocabulary
    input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts],
                          maxlen=MAX_LEN, dtype="long", truncating="post", padding="post")
    # Create attention masks
    attention_masks = []
    # Create a mask of 1s for each token followed by 0s for padding
    for seq in input_ids:
        seq_mask = [float(i>0) for i in seq]
        attention_masks.append(seq_mask)
    return input_ids, attention_masks

def Data_Loader(inputs_ids, attention_masks, df,batch_size=16): 
    data = TensorDataset(torch.LongTensor(inputs_ids), torch.LongTensor(attention_masks), torch.LongTensor(df['label'].values))
    sampler = RandomSampler(data)
    dataloader = DataLoader(data, sampler=sampler, batch_size=batch_size)
    return dataloader

In [25]:
def model_train(model, train_dataloader, validation_dataloader):
    # BERT training loop
    epochs = 3
    for _ in trange(epochs, desc="Epoch"):  
        # Set our model to training mode
        model.train()
        # Tracking variables
        tr_loss = 0
        nb_tr_examples, nb_tr_steps = 0, 0
        # Train the data for one epoch
        for step, batch in enumerate(train_dataloader):
            # Add batch to GPU
            batch = tuple(t.to(device) for t in batch)
            # Unpack the inputs from our dataloader
            b_input_ids, b_input_mask, b_labels = batch
            # Clear out the gradients (by default they accumulate)
            optimizer.zero_grad()
            # Forward pass
            outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)
            loss = outputs["loss"]
            # Backward pass
            loss.backward()
            # Update parameters and take a step using the computed gradient
            optimizer.step()
            # Update tracking variables
            tr_loss += loss.item()
            nb_tr_examples += b_input_ids.size(0)
            nb_tr_steps += 1
        print("Train loss: {}".format(tr_loss/nb_tr_steps))

        ## VALIDATION

        # Put model in evaluation mode
        model.eval()
        # Tracking variables 
        eval_loss, eval_accuracy = 0, 0
        nb_eval_steps, nb_eval_examples = 0, 0
        # Evaluate data for one epoch
        for batch in validation_dataloader:
            # Add batch to GPU
            batch = tuple(t.to(device) for t in batch)
            # Unpack the inputs from our dataloader
            b_input_ids, b_input_mask, b_labels = batch
            # Telling the model not to compute or store gradients, saving memory and speeding up validation
            with torch.no_grad():
              # Forward pass, calculate logit predictions
                logits = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)[0]
            # Move logits and labels to CPU
            logits = logits.detach().cpu().numpy()
            label_ids = b_labels.to('cpu').numpy()
            tmp_eval_accuracy = flat_accuracy(logits, label_ids)    
            eval_accuracy += tmp_eval_accuracy
            nb_eval_steps += 1
        validation_accuracy = (eval_accuracy/nb_eval_steps)
        print("Validation Accuracy: {}".format(eval_accuracy/nb_eval_steps))
    return validation_accuracy

In [26]:
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

def model_test(model,prediction_dataloader):
    model.eval()
    # Tracking variables 
    predictions , true_labels = [], []
# Predict
    for batch in prediction_dataloader:
        # Add batch to GPU
        batch = tuple(t.to(device) for t in batch)
        # Unpack the inputs from our dataloader
        b_input_ids, b_input_mask, b_labels = batch
        # Telling the model not to compute or store gradients, saving memory and speeding up prediction
        with torch.no_grad():
            # Forward pass, calculate logit predictions
            logits = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)[0]
        # Move logits and labels to CPU
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()
        predictions+=list(np.argmax(logits, axis=1).flatten())
        true_labels+=list(label_ids.flatten())
    test_f1_score = f1_score(true_labels, predictions, average= 'macro')
    print("Macro F1 Score:",test_f1_score)
    test_f1_wscore = f1_score(true_labels, predictions, average= 'weighted')
    print("Weighted F1 Score:",test_f1_wscore)
    test_accuracy_score = accuracy_score(true_labels, predictions)
    print("Accuracy score:", test_accuracy_score, "\n")
    print("="*100)
    return test_f1_score, test_f1_wscore

In [27]:
def model_initialise():
  # Load BertForSequenceClassification, the pretrained BERT model with a single linear classification layer on top. 
  model = BertForSequenceClassification.from_pretrained("asafaya/bert-base-arabic", num_labels=2).cuda()
  optimizer = optim.AdamW(params = model.parameters(), lr=2e-5)
  return model, optimizer

In [28]:
print_stmts= []
languages = ['Arabic']
directory = 'drive/My Drive/CS695'
for lang in languages:
  df = pd.read_csv(os.path.join(directory, lang+'.csv'))
  df = preprocess(df)
  sample_sizes = [16, 32, 64, 128, 256]
for sample in sample_sizes: 
  seeds = [2018,2019, 2020, 2021, 2022]
  weighted = []
  macro = []
  for seed in seeds:
    np.random.seed(seed)
    train_df, validation_df, test_df = train_validate_test_split(df, seed)
    train_len = len(train_df)
    if sample==256 and seed==2022:
        sample_sizes.append(train_len)
    if sample == train_len and seed == 2022:
        sample_sizes.remove(train_len)
    model, optimizer = model_initialise()
    if(sample != train_len):  
      train_df_sample = sample_data(train_df,sample,seed)
      train_input_ids, train_attention_masks = tokenize_data(train_df_sample)
      train_dataloader = Data_Loader(train_input_ids, train_attention_masks, train_df_sample)
    else:
      train_input_ids, train_attention_masks = tokenize_data(train_df)
      train_dataloader = Data_Loader(train_input_ids, train_attention_masks, train_df)

    validation_input_ids, validation_attention_masks = tokenize_data(validation_df)
    validation_dataloader = Data_Loader(validation_input_ids, validation_attention_masks, validation_df)
    print("\nModel Summary:")
    print('Language:', lang)
    print('Sample Size:', sample)
    print('Seed value:', seed)
    validation_accuracy = model_train(model, train_dataloader, validation_dataloader)
    test_input_ids, test_attention_masks = tokenize_data(test_df)
    test_dataloader = Data_Loader(test_input_ids, test_attention_masks, test_df)
    m, w = model_test(model, test_dataloader)
    weighted.append(w)
    macro.append(m)
  print("The Average  Weighted F1-Score of the Language ", lang, "is:",sum(weighted)/ len(weighted))
  print("The Average  Macro F1-Score of the Language ", lang, "is:",sum(macro)/ len(macro))
  print("="*200)
  print_stmts.append("For Sample Size "+str(sample)+" Average Weighted F1-Score "+str(sum(weighted)/len(weighted))+" and Average Macro F1-Score "+str(sum(macro)/len(macro))+" of "+ str(lang))
for i in print_stmts:
  print(i,"\n")
print("="*100+str(lang)+"="*100)

Some weights of the model checkpoint at asafaya/bert-base-arabic were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at asafaya/bert-base-ar


Model Summary:
Language: Arabic
Sample Size: 16
Seed value: 2018
Train loss: 0.761111319065094


Epoch:  33%|███▎      | 1/3 [00:01<00:02,  1.48s/it]

Validation Accuracy: 0.7381756756756757
Train loss: 0.6452572345733643


Epoch:  67%|██████▋   | 2/3 [00:02<00:01,  1.48s/it]

Validation Accuracy: 0.793918918918919
Train loss: 0.5573105812072754


Epoch: 100%|██████████| 3/3 [00:04<00:00,  1.47s/it]

Validation Accuracy: 0.7865990990990991





Macro F1 Score: 0.44084983099951713
Weighted F1 Score: 0.6951569873964752
Accuracy score: 0.7884283246977547 



Some weights of the model checkpoint at asafaya/bert-base-arabic were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at asafaya/bert-base-ar


Model Summary:
Language: Arabic
Sample Size: 16
Seed value: 2019
Train loss: 0.7817266583442688


Epoch:  33%|███▎      | 1/3 [00:01<00:02,  1.47s/it]

Validation Accuracy: 0.7331081081081081
Train loss: 0.6372336149215698


Epoch:  67%|██████▋   | 2/3 [00:02<00:01,  1.47s/it]

Validation Accuracy: 0.7775900900900901
Train loss: 0.5595855116844177


Epoch: 100%|██████████| 3/3 [00:04<00:00,  1.47s/it]

Validation Accuracy: 0.7792792792792792





Macro F1 Score: 0.44084983099951713
Weighted F1 Score: 0.6951569873964752
Accuracy score: 0.7884283246977547 



Some weights of the model checkpoint at asafaya/bert-base-arabic were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at asafaya/bert-base-ar


Model Summary:
Language: Arabic
Sample Size: 16
Seed value: 2020
Train loss: 0.7075396776199341


Epoch:  33%|███▎      | 1/3 [00:01<00:02,  1.47s/it]

Validation Accuracy: 0.7849099099099099
Train loss: 0.5729531049728394


Epoch:  67%|██████▋   | 2/3 [00:02<00:01,  1.47s/it]

Validation Accuracy: 0.7792792792792792
Train loss: 0.4702599346637726


Epoch: 100%|██████████| 3/3 [00:04<00:00,  1.47s/it]

Validation Accuracy: 0.7865990990990991





Macro F1 Score: 0.44084983099951713
Weighted F1 Score: 0.6951569873964752
Accuracy score: 0.7884283246977547 



Some weights of the model checkpoint at asafaya/bert-base-arabic were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at asafaya/bert-base-ar


Model Summary:
Language: Arabic
Sample Size: 16
Seed value: 2021
Train loss: 0.7206088304519653


Epoch:  33%|███▎      | 1/3 [00:01<00:02,  1.48s/it]

Validation Accuracy: 0.7680180180180181
Train loss: 0.5708395838737488


Epoch:  67%|██████▋   | 2/3 [00:02<00:01,  1.48s/it]

Validation Accuracy: 0.7865990990990991
Train loss: 0.5182000994682312


Epoch: 100%|██████████| 3/3 [00:04<00:00,  1.48s/it]

Validation Accuracy: 0.7865990990990991





Macro F1 Score: 0.44084983099951713
Weighted F1 Score: 0.6951569873964752
Accuracy score: 0.7884283246977547 



Some weights of the model checkpoint at asafaya/bert-base-arabic were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at asafaya/bert-base-ar


Model Summary:
Language: Arabic
Sample Size: 16
Seed value: 2022
Train loss: 0.6974090933799744


Epoch:  33%|███▎      | 1/3 [00:01<00:02,  1.49s/it]

Validation Accuracy: 0.7612612612612613
Train loss: 0.6282309889793396


Epoch:  67%|██████▋   | 2/3 [00:02<00:01,  1.49s/it]

Validation Accuracy: 0.793918918918919
Train loss: 0.5451418161392212


Epoch: 100%|██████████| 3/3 [00:04<00:00,  1.49s/it]

Validation Accuracy: 0.7865990990990991





Macro F1 Score: 0.44084983099951713
Weighted F1 Score: 0.6951569873964752
Accuracy score: 0.7884283246977547 

The Average  Weighted F1-Score of the Language  Arabic is: 0.6951569873964752
The Average  Macro F1-Score of the Language  Arabic is: 0.44084983099951713


Some weights of the model checkpoint at asafaya/bert-base-arabic were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at asafaya/bert-base-ar


Model Summary:
Language: Arabic
Sample Size: 32
Seed value: 2018
Train loss: 0.8136326372623444


Epoch:  33%|███▎      | 1/3 [00:01<00:03,  1.61s/it]

Validation Accuracy: 0.7719594594594594
Train loss: 0.5435575544834137


Epoch:  67%|██████▋   | 2/3 [00:03<00:01,  1.61s/it]

Validation Accuracy: 0.7865990990990991
Train loss: 0.47860686480998993


Epoch: 100%|██████████| 3/3 [00:04<00:00,  1.61s/it]

Validation Accuracy: 0.7865990990990991





Macro F1 Score: 0.44084983099951713
Weighted F1 Score: 0.6951569873964752
Accuracy score: 0.7884283246977547 



Some weights of the model checkpoint at asafaya/bert-base-arabic were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at asafaya/bert-base-ar


Model Summary:
Language: Arabic
Sample Size: 32
Seed value: 2019
Train loss: 0.5473939180374146


Epoch:  33%|███▎      | 1/3 [00:01<00:03,  1.63s/it]

Validation Accuracy: 0.7792792792792792
Train loss: 0.48574428260326385


Epoch:  67%|██████▋   | 2/3 [00:03<00:01,  1.62s/it]

Validation Accuracy: 0.7865990990990991
Train loss: 0.4294455647468567


Epoch: 100%|██████████| 3/3 [00:04<00:00,  1.62s/it]

Validation Accuracy: 0.7792792792792792





Macro F1 Score: 0.44084983099951713
Weighted F1 Score: 0.6951569873964752
Accuracy score: 0.7884283246977547 



Some weights of the model checkpoint at asafaya/bert-base-arabic were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at asafaya/bert-base-ar


Model Summary:
Language: Arabic
Sample Size: 32
Seed value: 2020
Train loss: 0.6566843092441559


Epoch:  33%|███▎      | 1/3 [00:01<00:03,  1.65s/it]

Validation Accuracy: 0.793918918918919
Train loss: 0.536289632320404


Epoch:  67%|██████▋   | 2/3 [00:03<00:01,  1.64s/it]

Validation Accuracy: 0.793918918918919
Train loss: 0.500442236661911


Epoch: 100%|██████████| 3/3 [00:04<00:00,  1.64s/it]

Validation Accuracy: 0.793918918918919





Macro F1 Score: 0.44084983099951713
Weighted F1 Score: 0.6951569873964752
Accuracy score: 0.7884283246977547 



Some weights of the model checkpoint at asafaya/bert-base-arabic were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at asafaya/bert-base-ar


Model Summary:
Language: Arabic
Sample Size: 32
Seed value: 2021
Train loss: 0.6693314909934998


Epoch:  33%|███▎      | 1/3 [00:01<00:03,  1.62s/it]

Validation Accuracy: 0.793918918918919
Train loss: 0.5254843533039093


Epoch:  67%|██████▋   | 2/3 [00:03<00:01,  1.62s/it]

Validation Accuracy: 0.793918918918919
Train loss: 0.5027865469455719


Epoch: 100%|██████████| 3/3 [00:04<00:00,  1.62s/it]

Validation Accuracy: 0.7865990990990991





Macro F1 Score: 0.44084983099951713
Weighted F1 Score: 0.6951569873964752
Accuracy score: 0.7884283246977547 



Some weights of the model checkpoint at asafaya/bert-base-arabic were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at asafaya/bert-base-ar


Model Summary:
Language: Arabic
Sample Size: 32
Seed value: 2022
Train loss: 0.6926472783088684


Epoch:  33%|███▎      | 1/3 [00:01<00:03,  1.61s/it]

Validation Accuracy: 0.7865990990990991
Train loss: 0.5568088293075562


Epoch:  67%|██████▋   | 2/3 [00:03<00:01,  1.61s/it]

Validation Accuracy: 0.7865990990990991
Train loss: 0.4690292477607727


Epoch: 100%|██████████| 3/3 [00:04<00:00,  1.62s/it]

Validation Accuracy: 0.793918918918919





Macro F1 Score: 0.44084983099951713
Weighted F1 Score: 0.6951569873964752
Accuracy score: 0.7884283246977547 

The Average  Weighted F1-Score of the Language  Arabic is: 0.6951569873964752
The Average  Macro F1-Score of the Language  Arabic is: 0.44084983099951713


Some weights of the model checkpoint at asafaya/bert-base-arabic were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at asafaya/bert-base-ar


Model Summary:
Language: Arabic
Sample Size: 64
Seed value: 2018
Train loss: 0.6064631268382072


Epoch:  33%|███▎      | 1/3 [00:01<00:03,  1.92s/it]

Validation Accuracy: 0.7792792792792792
Train loss: 0.44822418689727783


Epoch:  67%|██████▋   | 2/3 [00:03<00:01,  1.91s/it]

Validation Accuracy: 0.7882882882882883
Train loss: 0.40673868730664253


Epoch: 100%|██████████| 3/3 [00:05<00:00,  1.90s/it]

Validation Accuracy: 0.8040540540540541





Macro F1 Score: 0.5192837735512894
Weighted F1 Score: 0.730734328430774
Accuracy score: 0.7987910189982729 



Some weights of the model checkpoint at asafaya/bert-base-arabic were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at asafaya/bert-base-ar


Model Summary:
Language: Arabic
Sample Size: 64
Seed value: 2019
Train loss: 0.6704947203397751


Epoch:  33%|███▎      | 1/3 [00:01<00:03,  1.90s/it]

Validation Accuracy: 0.793918918918919
Train loss: 0.5244077816605568


Epoch:  67%|██████▋   | 2/3 [00:03<00:01,  1.89s/it]

Validation Accuracy: 0.793918918918919
Train loss: 0.5055199563503265


Epoch: 100%|██████████| 3/3 [00:05<00:00,  1.89s/it]

Validation Accuracy: 0.7865990990990991





Macro F1 Score: 0.44084983099951713
Weighted F1 Score: 0.6951569873964752
Accuracy score: 0.7884283246977547 



Some weights of the model checkpoint at asafaya/bert-base-arabic were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at asafaya/bert-base-ar


Model Summary:
Language: Arabic
Sample Size: 64
Seed value: 2020
Train loss: 0.5313308015465736


Epoch:  33%|███▎      | 1/3 [00:01<00:03,  1.91s/it]

Validation Accuracy: 0.793918918918919
Train loss: 0.4785248041152954


Epoch:  67%|██████▋   | 2/3 [00:03<00:01,  1.91s/it]

Validation Accuracy: 0.793918918918919
Train loss: 0.4411855638027191


Epoch: 100%|██████████| 3/3 [00:05<00:00,  1.90s/it]

Validation Accuracy: 0.7972972972972973





Macro F1 Score: 0.47820707668573437
Weighted F1 Score: 0.7131844035846163
Accuracy score: 0.7962003454231433 



Some weights of the model checkpoint at asafaya/bert-base-arabic were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at asafaya/bert-base-ar


Model Summary:
Language: Arabic
Sample Size: 64
Seed value: 2021
Train loss: 0.6452061533927917


Epoch:  33%|███▎      | 1/3 [00:01<00:03,  1.92s/it]

Validation Accuracy: 0.7865990990990991
Train loss: 0.4854462221264839


Epoch:  67%|██████▋   | 2/3 [00:03<00:01,  1.91s/it]

Validation Accuracy: 0.7865990990990991
Train loss: 0.4699380025267601


Epoch: 100%|██████████| 3/3 [00:05<00:00,  1.90s/it]

Validation Accuracy: 0.793918918918919





Macro F1 Score: 0.44084983099951713
Weighted F1 Score: 0.6951569873964752
Accuracy score: 0.7884283246977547 



Some weights of the model checkpoint at asafaya/bert-base-arabic were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at asafaya/bert-base-ar


Model Summary:
Language: Arabic
Sample Size: 64
Seed value: 2022
Train loss: 0.5587659105658531


Epoch:  33%|███▎      | 1/3 [00:01<00:03,  1.89s/it]

Validation Accuracy: 0.7865990990990991
Train loss: 0.47818339616060257


Epoch:  67%|██████▋   | 2/3 [00:03<00:01,  1.89s/it]

Validation Accuracy: 0.793918918918919
Train loss: 0.4531751722097397


Epoch: 100%|██████████| 3/3 [00:05<00:00,  1.89s/it]

Validation Accuracy: 0.7792792792792792





Macro F1 Score: 0.44084983099951713
Weighted F1 Score: 0.6951569873964752
Accuracy score: 0.7884283246977547 

The Average  Weighted F1-Score of the Language  Arabic is: 0.7058779388409632
The Average  Macro F1-Score of the Language  Arabic is: 0.464008068647115


Some weights of the model checkpoint at asafaya/bert-base-arabic were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at asafaya/bert-base-ar


Model Summary:
Language: Arabic
Sample Size: 128
Seed value: 2018
Train loss: 0.5625194013118744


Epoch:  33%|███▎      | 1/3 [00:02<00:04,  2.46s/it]

Validation Accuracy: 0.7865990990990991
Train loss: 0.4856507331132889


Epoch:  67%|██████▋   | 2/3 [00:04<00:02,  2.46s/it]

Validation Accuracy: 0.793918918918919
Train loss: 0.4004993662238121


Epoch: 100%|██████████| 3/3 [00:07<00:00,  2.46s/it]

Validation Accuracy: 0.7849099099099099





Macro F1 Score: 0.44084983099951713
Weighted F1 Score: 0.6951569873964752
Accuracy score: 0.7884283246977547 



Some weights of the model checkpoint at asafaya/bert-base-arabic were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at asafaya/bert-base-ar


Model Summary:
Language: Arabic
Sample Size: 128
Seed value: 2019
Train loss: 0.5302252992987633


Epoch:  33%|███▎      | 1/3 [00:02<00:04,  2.45s/it]

Validation Accuracy: 0.7865990990990991
Train loss: 0.4433499239385128


Epoch:  67%|██████▋   | 2/3 [00:04<00:02,  2.47s/it]

Validation Accuracy: 0.7916666666666667
Train loss: 0.36276644468307495


Epoch: 100%|██████████| 3/3 [00:07<00:00,  2.47s/it]

Validation Accuracy: 0.8429054054054054





Macro F1 Score: 0.7087949176096884
Weighted F1 Score: 0.8267315818041485
Accuracy score: 0.8523316062176166 



Some weights of the model checkpoint at asafaya/bert-base-arabic were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at asafaya/bert-base-ar


Model Summary:
Language: Arabic
Sample Size: 128
Seed value: 2020
Train loss: 0.5565903149545193


Epoch:  33%|███▎      | 1/3 [00:02<00:04,  2.44s/it]

Validation Accuracy: 0.7865990990990991
Train loss: 0.4552023224532604


Epoch:  67%|██████▋   | 2/3 [00:04<00:02,  2.45s/it]

Validation Accuracy: 0.7792792792792792
Train loss: 0.36934826150536537


Epoch: 100%|██████████| 3/3 [00:07<00:00,  2.45s/it]

Validation Accuracy: 0.838963963963964





Macro F1 Score: 0.7252939270505461
Weighted F1 Score: 0.8355906593450518
Accuracy score: 0.8583765112262521 



Some weights of the model checkpoint at asafaya/bert-base-arabic were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at asafaya/bert-base-ar


Model Summary:
Language: Arabic
Sample Size: 128
Seed value: 2021
Train loss: 0.5865986682474613


Epoch:  33%|███▎      | 1/3 [00:02<00:04,  2.48s/it]

Validation Accuracy: 0.7865990990990991
Train loss: 0.450510211288929


Epoch:  67%|██████▋   | 2/3 [00:04<00:02,  2.47s/it]

Validation Accuracy: 0.793918918918919
Train loss: 0.37044027261435986


Epoch: 100%|██████████| 3/3 [00:07<00:00,  2.46s/it]

Validation Accuracy: 0.7961711711711711





Macro F1 Score: 0.5063039295708955
Weighted F1 Score: 0.7251854116923478
Accuracy score: 0.7979274611398963 



Some weights of the model checkpoint at asafaya/bert-base-arabic were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at asafaya/bert-base-ar


Model Summary:
Language: Arabic
Sample Size: 128
Seed value: 2022
Train loss: 0.6174258254468441


Epoch:  33%|███▎      | 1/3 [00:02<00:05,  2.51s/it]

Validation Accuracy: 0.7865990990990991
Train loss: 0.48135869577527046


Epoch:  67%|██████▋   | 2/3 [00:04<00:02,  2.49s/it]

Validation Accuracy: 0.793918918918919
Train loss: 0.44173618219792843


Epoch: 100%|██████████| 3/3 [00:07<00:00,  2.47s/it]

Validation Accuracy: 0.7905405405405406





Macro F1 Score: 0.45476529482224787
Weighted F1 Score: 0.699212651166538
Accuracy score: 0.7841105354058722 

The Average  Weighted F1-Score of the Language  Arabic is: 0.7563754582809123
The Average  Macro F1-Score of the Language  Arabic is: 0.5672015800105791


Some weights of the model checkpoint at asafaya/bert-base-arabic were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at asafaya/bert-base-ar


Model Summary:
Language: Arabic
Sample Size: 256
Seed value: 2018
Train loss: 0.5275416560471058


Epoch:  33%|███▎      | 1/3 [00:03<00:07,  3.56s/it]

Validation Accuracy: 0.793918918918919
Train loss: 0.3832092210650444


Epoch:  67%|██████▋   | 2/3 [00:07<00:03,  3.55s/it]

Validation Accuracy: 0.8710585585585585
Train loss: 0.2100411329884082


Epoch: 100%|██████████| 3/3 [00:10<00:00,  3.54s/it]

Validation Accuracy: 0.8699324324324325





Macro F1 Score: 0.7990940258763402
Weighted F1 Score: 0.8660483010008556
Accuracy score: 0.8661485319516408 



Some weights of the model checkpoint at asafaya/bert-base-arabic were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at asafaya/bert-base-ar


Model Summary:
Language: Arabic
Sample Size: 256
Seed value: 2019
Train loss: 0.4964395686984062


Epoch:  33%|███▎      | 1/3 [00:03<00:07,  3.60s/it]

Validation Accuracy: 0.7792792792792792
Train loss: 0.3910885863006115


Epoch:  67%|██████▋   | 2/3 [00:07<00:03,  3.59s/it]

Validation Accuracy: 0.8260135135135135
Train loss: 0.2494367053732276


Epoch: 100%|██████████| 3/3 [00:10<00:00,  3.57s/it]

Validation Accuracy: 0.8507882882882883





Macro F1 Score: 0.7211878009630819
Weighted F1 Score: 0.83257761624764
Accuracy score: 0.8549222797927462 



Some weights of the model checkpoint at asafaya/bert-base-arabic were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at asafaya/bert-base-ar


Model Summary:
Language: Arabic
Sample Size: 256
Seed value: 2020
Train loss: 0.5055221300572157


Epoch:  33%|███▎      | 1/3 [00:03<00:07,  3.54s/it]

Validation Accuracy: 0.7865990990990991
Train loss: 0.34422988072037697


Epoch:  67%|██████▋   | 2/3 [00:07<00:03,  3.54s/it]

Validation Accuracy: 0.8513513513513513
Train loss: 0.20258108410052955


Epoch: 100%|██████████| 3/3 [00:10<00:00,  3.54s/it]

Validation Accuracy: 0.8665540540540541





Macro F1 Score: 0.7920313548175811
Weighted F1 Score: 0.8711812902417521
Accuracy score: 0.8825561312607945 



Some weights of the model checkpoint at asafaya/bert-base-arabic were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at asafaya/bert-base-ar


Model Summary:
Language: Arabic
Sample Size: 256
Seed value: 2021
Train loss: 0.523699801415205


Epoch:  33%|███▎      | 1/3 [00:03<00:07,  3.55s/it]

Validation Accuracy: 0.7865990990990991
Train loss: 0.414869237691164


Epoch:  67%|██████▋   | 2/3 [00:07<00:03,  3.55s/it]

Validation Accuracy: 0.8733108108108109
Train loss: 0.22233695397153497


Epoch: 100%|██████████| 3/3 [00:10<00:00,  3.55s/it]

Validation Accuracy: 0.8614864864864865





Macro F1 Score: 0.7897845948820462
Weighted F1 Score: 0.8563856329662546
Accuracy score: 0.853195164075993 



Some weights of the model checkpoint at asafaya/bert-base-arabic were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at asafaya/bert-base-ar


Model Summary:
Language: Arabic
Sample Size: 256
Seed value: 2022
Train loss: 0.5293125659227371


Epoch:  33%|███▎      | 1/3 [00:03<00:07,  3.56s/it]

Validation Accuracy: 0.793918918918919
Train loss: 0.4649667739868164


Epoch:  67%|██████▋   | 2/3 [00:07<00:03,  3.57s/it]

Validation Accuracy: 0.8141891891891891
Train loss: 0.37047802563756704


Epoch: 100%|██████████| 3/3 [00:10<00:00,  3.57s/it]

Validation Accuracy: 0.8175675675675675





Macro F1 Score: 0.7959571704290743
Weighted F1 Score: 0.8573499956895887
Accuracy score: 0.8514680483592401 

The Average  Weighted F1-Score of the Language  Arabic is: 0.8567085672292182
The Average  Macro F1-Score of the Language  Arabic is: 0.7796109893936248


Some weights of the model checkpoint at asafaya/bert-base-arabic were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at asafaya/bert-base-ar


Model Summary:
Language: Arabic
Sample Size: 4051
Seed value: 2018
Train loss: 0.32277468149000266


Epoch:  33%|███▎      | 1/3 [00:36<01:12, 36.34s/it]

Validation Accuracy: 0.9037162162162162
Train loss: 0.1990095108498152


Epoch:  67%|██████▋   | 2/3 [01:12<00:36, 36.30s/it]

Validation Accuracy: 0.9155405405405406
Train loss: 0.10049040190046873


Epoch: 100%|██████████| 3/3 [01:48<00:00, 36.32s/it]

Validation Accuracy: 0.8952702702702703





Macro F1 Score: 0.8543518363239979
Weighted F1 Score: 0.8994807356939469
Accuracy score: 0.8963730569948186 



Some weights of the model checkpoint at asafaya/bert-base-arabic were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at asafaya/bert-base-ar


Model Summary:
Language: Arabic
Sample Size: 4051
Seed value: 2019
Train loss: 0.30568578316441436


Epoch:  33%|███▎      | 1/3 [00:36<01:12, 36.43s/it]

Validation Accuracy: 0.9003378378378378
Train loss: 0.1684904418863708


Epoch:  67%|██████▋   | 2/3 [01:12<00:36, 36.45s/it]

Validation Accuracy: 0.9121621621621622
Train loss: 0.08720334098623024


Epoch: 100%|██████████| 3/3 [01:49<00:00, 36.50s/it]

Validation Accuracy: 0.9087837837837838





Macro F1 Score: 0.8677484109867037
Weighted F1 Score: 0.910966352943638
Accuracy score: 0.9101899827288429 



Some weights of the model checkpoint at asafaya/bert-base-arabic were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at asafaya/bert-base-ar


Model Summary:
Language: Arabic
Sample Size: 4051
Seed value: 2020
Train loss: 0.3241280182374744


Epoch:  33%|███▎      | 1/3 [00:36<01:13, 36.53s/it]

Validation Accuracy: 0.9014639639639639
Train loss: 0.18614191902933394


Epoch:  67%|██████▋   | 2/3 [01:12<00:36, 36.50s/it]

Validation Accuracy: 0.918918918918919
Train loss: 0.08756070838018327


Epoch: 100%|██████████| 3/3 [01:49<00:00, 36.48s/it]

Validation Accuracy: 0.9054054054054054





Macro F1 Score: 0.8829184896503548
Weighted F1 Score: 0.9229287866864455
Accuracy score: 0.924006908462867 



Some weights of the model checkpoint at asafaya/bert-base-arabic were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at asafaya/bert-base-ar


Model Summary:
Language: Arabic
Sample Size: 4051
Seed value: 2021
Train loss: 0.3184416338684052


Epoch:  33%|███▎      | 1/3 [00:36<01:13, 36.71s/it]

Validation Accuracy: 0.8817567567567568
Train loss: 0.181548639501291


Epoch:  67%|██████▋   | 2/3 [01:13<00:36, 36.68s/it]

Validation Accuracy: 0.8918918918918919
Train loss: 0.10963626680840687


Epoch: 100%|██████████| 3/3 [01:50<00:00, 36.69s/it]

Validation Accuracy: 0.8913288288288288





Macro F1 Score: 0.8505152025080105
Weighted F1 Score: 0.9017476152875917
Accuracy score: 0.9032815198618307 



Some weights of the model checkpoint at asafaya/bert-base-arabic were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at asafaya/bert-base-ar


Model Summary:
Language: Arabic
Sample Size: 4051
Seed value: 2022
Train loss: 0.3104409219433121


Epoch:  33%|███▎      | 1/3 [00:36<01:12, 36.41s/it]

Validation Accuracy: 0.8941441441441442
Train loss: 0.17226482951998945


Epoch:  67%|██████▋   | 2/3 [01:12<00:36, 36.40s/it]

Validation Accuracy: 0.8811936936936936
Train loss: 0.09850293807740375


Epoch: 100%|██████████| 3/3 [01:49<00:00, 36.41s/it]

Validation Accuracy: 0.893581081081081





Macro F1 Score: 0.8471050486930947
Weighted F1 Score: 0.8988968959624088
Accuracy score: 0.8998272884283247 

The Average  Weighted F1-Score of the Language  Arabic is: 0.9068040773148063
The Average  Macro F1-Score of the Language  Arabic is: 0.8605277976324324
For Sample Size 16 Average Weighted F1-Score 0.6951569873964752 and Average Macro F1-Score 0.44084983099951713 of Arabic 

For Sample Size 32 Average Weighted F1-Score 0.6951569873964752 and Average Macro F1-Score 0.44084983099951713 of Arabic 

For Sample Size 64 Average Weighted F1-Score 0.7058779388409632 and Average Macro F1-Score 0.464008068647115 of Arabic 

For Sample Size 128 Average Weighted F1-Score 0.7563754582809123 and Average Macro F1-Score 0.5672015800105791 of Arabic 

For Sample Size 256 Average Weighted F1-Score 0.8567085672292182 and Average Macro F1-Score 0.7796109893936248 of Arabic 

For Sample Size 4051 Average Weighted F1-Score 0.9068040773148063 and Average Macro F1-Score 0.8605277976324324 of Arabic 

