In [None]:
pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
pip install torchmetrics

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
pip install pytorch_lightning

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# Importing Libaraires


In [None]:

# Add environment Packages paths to conda
import os, sys, warnings
import pandas as pd
import numpy as np
warnings.simplefilter("ignore")

# Text preprocessing packages
import nltk # Text libarary
# nltk.download('stopwords')
import string # Removing special characters {#, @, ...}

# Modelling
from sklearn.metrics import accuracy_score,confusion_matrix, classification_report
from sklearn.pipeline import Pipeline
from sklearn.metrics import f1_score
from sklearn.svm import SVC
# Saving Model
import pickle

# Visualization Packages
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(font_scale=1.3)
%matplotlib inline


from pylab import rcParams
import joblib

from transformers import AutoTokenizer, AutoModel
import torch
from torch import nn,optim
from torch.utils.data import Dataset, DataLoader
from torchmetrics.functional import f1_score
import pytorch_lightning as pl
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import roc_auc_score, accuracy_score, confusion_matrix, classification_report

from tqdm.auto import tqdm

torch.cuda.empty_cache()


In [None]:
torch.cuda.empty_cache()

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#import the dataset
path_input = '/content/drive/MyDrive/Colab Notebooks/cleaned_data_IRIS.csv'
df = pd.read_csv(path_input)
df.dropna(inplace=True)
df.head()

Unnamed: 0,text,label
0,اوليمبياد الجايه هكون لسه الكليه,none
1,عجز الموازنه وصل ل الناتج المحلي يعني لسه اقل ...,anger
2,كتنا نيله حظنا الهباب يضحك,sadness
3,جميعنا نريد تحقيق اهدافنا تونس تالقت حراسه المرمي,joy
4,اوليمبياد نظامها مختلف ومواعيد المونديال مكانت...,none


In [None]:
#print the shape of the dataset
df.shape

(9679, 2)

In [None]:
#check for null values
df.isnull().sum()

text     0
label    0
dtype: int64

In [None]:
#check for duplication
df.duplicated().sum()

0

In [None]:
#make train and test data and save it as a csv file
train, val = train_test_split(df[['label','text']], test_size=0.2)


lbl_enc = LabelEncoder()
train.loc[:,"label"] = lbl_enc.fit_transform(train["label"])
val.loc[:,"label"] = lbl_enc.transform(val["label"])

joblib.dump(lbl_enc,"label_encoder.pkl")

train.to_csv("train.csv",index=False)
val.to_csv("test.csv",index=False)

In [None]:
#print the classes and it's encoding
lbl_enc.classes_
{v: k for v, k in enumerate(lbl_enc.classes_)}

{0: 'anger',
 1: 'fear',
 2: 'joy',
 3: 'love',
 4: 'none',
 5: 'sadness',
 6: 'surprise',
 7: 'sympathy'}

In [None]:
#make a new data to test different examples in our data
new_data = df.iloc[:2]
new_data

new_text  = {'text': 'حب','label': 'love'}
new_data = new_data.append(new_text, ignore_index = True)
new_text  = {'text': 'مستغرب','label': 'surprise'}
new_data = new_data.append(new_text, ignore_index = True)
new_text  = {'text': 'حزن','label': 'sadness'}
new_data = new_data.append(new_text, ignore_index = True)
new_data.loc[:,"label"] = lbl_enc.transform(new_data["label"])
new_data.to_csv("new_data.csv",index=False)

new_data


Unnamed: 0,text,label
0,اوليمبياد الجايه هكون لسه الكليه,4
1,عجز الموازنه وصل ل الناتج المحلي يعني لسه اقل ...,0
2,حب,3
3,مستغرب,6
4,حزن,5


In [None]:
class ArabicDataset(Dataset):
    def __init__(self,data,max_len,model_type="Twitter"):
        super().__init__()
        self.labels = data["label"].values
        self.texts = data["text"].values
        self.max_len = max_len
        model = {"Twitter": "aubmindlab/bert-base-arabertv02-twitter",
                "ARBERT": "UBC-NLP/ARBERT",
                "Base": "aubmindlab/bert-base-arabertv02",
                "MARBERT": "UBC-NLP/MARBERT"}
        self.tokenizer = AutoTokenizer.from_pretrained(model[model_type])
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self,idx):
        text = " ".join(self.texts[idx].split())
        label = self.labels[idx]
        inputs = self.tokenizer(text,padding='max_length',
                                max_length=self.max_len,truncation=True,return_tensors="pt")
        
        #input_ids,token_type_ids,attention_mask
        return {
            "inputs":{"input_ids":inputs["input_ids"][0],
                      "token_type_ids":inputs["token_type_ids"][0],
                      "attention_mask":inputs["attention_mask"][0],
                     },
            "labels": torch.tensor(label,dtype=torch.long) 
        }

In [None]:
class ArabicDataModule(pl.LightningDataModule):
    def __init__(self,train_path,val_path,new_data_path,batch_size=12,max_len=100,model_type="Twitter"):
        super().__init__()
        self.train_path,self.val_path,self.new_data_path= train_path,val_path,new_data_path
        self.batch_size = batch_size
        self.max_len = max_len
        self.model_type = model_type
    
    def setup(self,stage=None):
        train = pd.read_csv(self.train_path)
        val = pd.read_csv(self.val_path)
        new_data = pd.read_csv(self.new_data_path)
        self.train_dataset = ArabicDataset(data=train,max_len=self.max_len,model_type=self.model_type)
        self.val_dataset = ArabicDataset(data=val,max_len=self.max_len,model_type=self.model_type)
        self.new_dataset = ArabicDataset(data=new_data,max_len=self.max_len,model_type=self.model_type)

    def train_dataloader(self):
        return DataLoader(self.train_dataset,batch_size=self.batch_size,shuffle=True)
    
    def val_dataloader(self):
        return DataLoader(self.new_dataset,batch_size=self.batch_size,shuffle=False)
    
    def test_dataloader(self):
        return DataLoader(self.val_dataset,batch_size=self.batch_size,shuffle=False)

In [None]:
n_classes = 8
class ArabicBertModel(pl.LightningModule):
    def __init__(self,model_type="Twitter"):
        super().__init__()
        model = {"Twitter": ("aubmindlab/bert-base-arabertv02-twitter",768),
                "ARBERT": ("UBC-NLP/ARBERT",768),
                 "Base": ("aubmindlab/bert-base-arabertv02",768),
                "MARBERT": ("UBC-NLP/MARBERT",768)}
        self.bert_model = AutoModel.from_pretrained(model[model_type][0])
        self.fc = nn.Linear(model[model_type][1],n_classes)
    
    def forward(self,inputs):
        out = self.bert_model(**inputs)#inputs["input_ids"],inputs["token_type_ids"],inputs["attention_mask"])
        last_hidden_states = out[1]
        out = self.fc(last_hidden_states)
        return out
    
    def configure_optimizers(self):
        return optim.AdamW(self.parameters(), lr=1e-6)
    
    def criterion(self,output,target):
        return nn.CrossEntropyLoss()(output,target)
    
    #TODO: adding metrics
    def training_step(self,batch,batch_idx):
        x,y = batch["inputs"],batch["labels"]
        out = self(x)
        loss = self.criterion(out,y)
        f1_result = f1_score(out, y, num_classes=n_classes)
        metrics = {"train_f1": f1_result, "train_loss": loss}
        self.log_dict(metrics)
        return loss
    
    def validation_step(self,batch,batch_idx):
        x, y = batch["inputs"],batch["labels"]
        out = self(x)
        loss = self.criterion(out,y)
        f1_result = f1_score(out, y, num_classes=n_classes)
        metrics = {"val_f1": f1_result, "val_loss": loss}
        self.log_dict(metrics)
        return metrics

# Training On MARBERT

## LR = 1e-6

#### 10 epoch

In [None]:
# with lr = 1e-6 
MODEL_TYPE = "MARBERT"
dm = ArabicDataModule(train_path="./train.csv",
                val_path = "./test.csv",
                new_data_path = "./new_data.csv",
                batch_size=128, max_len=70, model_type=MODEL_TYPE)

model = ArabicBertModel(model_type=MODEL_TYPE)
trainer = pl.Trainer(gpus=-1,max_epochs=10, default_root_dir='.', callbacks=[EarlyStopping(monitor="val_loss")]) 
trainer.fit(model,dm)

Downloading config.json:   0%|          | 0.00/701 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/624M [00:00<?, ?B/s]

Some weights of the model checkpoint at UBC-NLP/MARBERT were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  f"Setting `Trainer(gpus={gpus!r})` is deprecated in v1.7 and will be removed"
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: 

Downloading tokenizer_config.json:   0%|          | 0.00/376 [00:00<?, ?B/s]

Downloading vocab.txt:   0%|          | 0.00/1.05M [00:00<?, ?B/s]

Downloading special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type      | Params
-----------------------------------------
0 | bert_model | BertModel | 162 M 
1 | fc         | Linear    | 6.2 K 
-----------------------------------------
162 M     Trainable params
0         Non-trainable params
162 M     Total params
651.390   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=10` reached.


##### Training Results

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = torch.load('../input/fine-tuning-arabert/arabert_arabic_dialect.pth',  map_location=device)
model.to(device)

preds = []
real_values = []

test_dataloader = dm.test_dataloader()

progress_bar = tqdm(range(len(test_dataloader)))

model.eval()
for batch in test_dataloader:    
    x,y = batch["inputs"],batch["labels"]
    inp = {k: v.to(device) for k, v in x.items()}
    
    with torch.no_grad():
        outputs = model(inp)

    predictions = torch.argmax(outputs, dim=1)
    
    preds.extend(predictions)
    real_values.extend(y)

    progress_bar.update()
    
preds = torch.stack(preds).cpu()
real_values = torch.stack(real_values).cpu()
print(classification_report(real_values, preds, target_names=lbl_enc.classes_))

  0%|          | 0/16 [00:00<?, ?it/s]

              precision    recall  f1-score   support

       anger       0.74      0.87      0.80       293
        fear       0.94      0.90      0.92       227
         joy       0.68      0.64      0.66       226
        love       0.87      0.79      0.83       256
        none       0.68      0.92      0.78       310
     sadness       0.70      0.55      0.61       246
    surprise       0.63      0.36      0.46       194
    sympathy       0.80      0.91      0.85       184

    accuracy                           0.76      1936
   macro avg       0.76      0.74      0.74      1936
weighted avg       0.75      0.76      0.75      1936



#### 20 epoch

In [None]:
# with lr = 1e-6 
MODEL_TYPE = "MARBERT"
dm = ArabicDataModule(train_path="./train.csv",
                val_path = "./test.csv",
                new_data_path = "./new_data.csv",
                batch_size=128, max_len=70, model_type=MODEL_TYPE)

model = ArabicBertModel(model_type=MODEL_TYPE)
trainer = pl.Trainer(gpus=-1,max_epochs=20, default_root_dir='.', callbacks=[EarlyStopping(monitor="val_loss")]) 
trainer.fit(model,dm)

Some weights of the model checkpoint at UBC-NLP/MARBERT were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  f"Setting `Trainer(gpus={gpus!r})` is deprecated in v1.7 and will be removed"
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: 

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


##### Training Results

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = torch.load('../input/fine-tuning-arabert/arabert_arabic_dialect.pth',  map_location=device)
model.to(device)

preds = []
real_values = []

test_dataloader = dm.test_dataloader()

progress_bar = tqdm(range(len(test_dataloader)))

model.eval()
for batch in test_dataloader:    
    x,y = batch["inputs"],batch["labels"]
    inp = {k: v.to(device) for k, v in x.items()}
    
    with torch.no_grad():
        outputs = model(inp)

    predictions = torch.argmax(outputs, dim=1)
    
    preds.extend(predictions)
    real_values.extend(y)

    progress_bar.update()
    
preds = torch.stack(preds).cpu()
real_values = torch.stack(real_values).cpu()
print(classification_report(real_values, preds, target_names=lbl_enc.classes_))

  0%|          | 0/16 [00:00<?, ?it/s]

              precision    recall  f1-score   support

       anger       0.77      0.84      0.81       293
        fear       0.95      0.92      0.93       227
         joy       0.68      0.70      0.69       226
        love       0.89      0.75      0.82       256
        none       0.72      0.87      0.79       310
     sadness       0.69      0.60      0.64       246
    surprise       0.60      0.52      0.55       194
    sympathy       0.85      0.90      0.87       184

    accuracy                           0.77      1936
   macro avg       0.77      0.76      0.76      1936
weighted avg       0.77      0.77      0.77      1936



#### 30 epoch , batch size = 50 

In [None]:
MODEL_TYPE = "MARBERT"
dm = ArabicDataModule(train_path="./train.csv",
                val_path = "./test.csv",
                new_data_path = "./new_data.csv",
                batch_size=50, max_len=70, model_type=MODEL_TYPE)

model = ArabicBertModel(model_type=MODEL_TYPE)
trainer = pl.Trainer(gpus=-1,max_epochs=30, default_root_dir='.', callbacks=[EarlyStopping(monitor="val_loss")]) 
trainer.fit(model,dm)

Some weights of the model checkpoint at UBC-NLP/MARBERT were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  f"Setting `Trainer(gpus={gpus!r})` is deprecated in v1.7 and will be removed"
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: 

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=30` reached.


##### Training Results

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = torch.load('../input/fine-tuning-arabert/arabert_arabic_dialect.pth',  map_location=device)
model.to(device)

preds = []
real_values = []

test_dataloader = dm.test_dataloader()

progress_bar = tqdm(range(len(test_dataloader)))

model.eval()
for batch in test_dataloader:    
    x,y = batch["inputs"],batch["labels"]
    inp = {k: v.to(device) for k, v in x.items()}
    
    with torch.no_grad():
        outputs = model(inp)

    predictions = torch.argmax(outputs, dim=1)
    
    preds.extend(predictions)
    real_values.extend(y)

    progress_bar.update()
    
preds = torch.stack(preds).cpu()
real_values = torch.stack(real_values).cpu()
print(classification_report(real_values, preds, target_names=lbl_enc.classes_))

  0%|          | 0/39 [00:00<?, ?it/s]

              precision    recall  f1-score   support

       anger       0.76      0.82      0.79       293
        fear       0.93      0.92      0.92       227
         joy       0.68      0.67      0.67       226
        love       0.84      0.78      0.81       256
        none       0.76      0.72      0.74       310
     sadness       0.62      0.65      0.63       246
    surprise       0.58      0.57      0.58       194
    sympathy       0.87      0.91      0.89       184

    accuracy                           0.75      1936
   macro avg       0.75      0.75      0.75      1936
weighted avg       0.75      0.75      0.75      1936



## LR = 2e-6

In [None]:
n_classes = 8
class ArabicBertModel(pl.LightningModule):
    def __init__(self,model_type="Twitter"):
        super().__init__()
        model = {"Twitter": ("aubmindlab/bert-base-arabertv02-twitter",768),
                "ARBERT": ("UBC-NLP/ARBERT",768),
                 "Base": ("aubmindlab/bert-base-arabertv02",768),
                "MARBERT": ("UBC-NLP/MARBERT",768)}
        self.bert_model = AutoModel.from_pretrained(model[model_type][0])
        self.fc = nn.Linear(model[model_type][1],n_classes)
    
    def forward(self,inputs):
        out = self.bert_model(**inputs)#inputs["input_ids"],inputs["token_type_ids"],inputs["attention_mask"])
        last_hidden_states = out[1]
        out = self.fc(last_hidden_states)
        return out
    def configure_optimizers(self):
        return optim.AdamW(self.parameters(), lr=2e-06)
    
    def criterion(self,output,target):
        return nn.CrossEntropyLoss()(output,target)
    
    #TODO: adding metrics
    def training_step(self,batch,batch_idx):
        x,y = batch["inputs"],batch["labels"]
        out = self(x)
        loss = self.criterion(out,y)
        f1_result = f1_score(out, y, num_classes=n_classes)
        metrics = {"train_f1": f1_result, "train_loss": loss}
        self.log_dict(metrics)
        return loss
    
    def validation_step(self,batch,batch_idx):
        x, y = batch["inputs"],batch["labels"]
        out = self(x)
        loss = self.criterion(out,y)
        f1_result = f1_score(out, y, num_classes=n_classes)
        metrics = {"val_f1": f1_result, "val_loss": loss}
        self.log_dict(metrics)
        return metrics

#### 10 epoch 

In [None]:
# TODO: getting different models sizes results
MODEL_TYPE = "MARBERT"
dm = ArabicDataModule(train_path="./train.csv",
                val_path = "./test.csv",
                new_data_path = "./new_data.csv",
                batch_size=128, max_len=70, model_type=MODEL_TYPE)

model = ArabicBertModel(model_type=MODEL_TYPE)
trainer = pl.Trainer(gpus=-1,max_epochs=10, default_root_dir='.', callbacks=[EarlyStopping(monitor="val_loss")]) 
trainer.fit(model,dm)

Some weights of the model checkpoint at UBC-NLP/MARBERT were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  f"Setting `Trainer(gpus={gpus!r})` is deprecated in v1.7 and will be removed"
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: 

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=10` reached.


##### Training Results

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

preds = []
real_values = []

test_dataloader = dm.test_dataloader()

progress_bar = tqdm(range(len(test_dataloader)))

model.eval()
for batch in test_dataloader:    
    x,y = batch["inputs"],batch["labels"]
    inp = {k: v.to(device) for k, v in x.items()}
    
    with torch.no_grad():
        outputs = model(inp)

    predictions = torch.argmax(outputs, dim=1)
    
    preds.extend(predictions)
    real_values.extend(y)

    progress_bar.update()
    
preds = torch.stack(preds).cpu()
real_values = torch.stack(real_values).cpu()
print(classification_report(real_values, preds, target_names=lbl_enc.classes_))



  0%|          | 0/16 [00:00<?, ?it/s]

              precision    recall  f1-score   support

       anger       0.83      0.79      0.81       326
        fear       0.93      0.91      0.92       223
         joy       0.66      0.60      0.63       264
        love       0.79      0.76      0.78       239
        none       0.73      0.87      0.79       320
     sadness       0.60      0.57      0.58       206
    surprise       0.61      0.53      0.57       165
    sympathy       0.82      0.92      0.87       193

    accuracy                           0.75      1936
   macro avg       0.75      0.74      0.74      1936
weighted avg       0.75      0.75      0.75      1936



#### 20 epoch 

In [None]:
# TODO: getting different models sizes results
MODEL_TYPE = "MARBERT"
dm = ArabicDataModule(train_path="./train.csv",
                val_path = "./test.csv",
                new_data_path = "./new_data.csv",
                batch_size=128, max_len=70, model_type=MODEL_TYPE)

model = ArabicBertModel(model_type=MODEL_TYPE)
trainer = pl.Trainer(gpus=-1,max_epochs=20, default_root_dir='.', callbacks=[EarlyStopping(monitor="val_loss")]) 
trainer.fit(model,dm)

Some weights of the model checkpoint at UBC-NLP/MARBERT were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  f"Setting `Trainer(gpus={gpus!r})` is deprecated in v1.7 and will be removed"
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: 

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


##### Training Results

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

preds = []
real_values = []

test_dataloader = dm.test_dataloader()

progress_bar = tqdm(range(len(test_dataloader)))

model.eval()
for batch in test_dataloader:    
    x,y = batch["inputs"],batch["labels"]
    inp = {k: v.to(device) for k, v in x.items()}
    
    with torch.no_grad():
        outputs = model(inp)

    predictions = torch.argmax(outputs, dim=1)
    
    preds.extend(predictions)
    real_values.extend(y)

    progress_bar.update()
    
preds = torch.stack(preds).cpu()
real_values = torch.stack(real_values).cpu()
print(classification_report(real_values, preds, target_names=lbl_enc.classes_))



  0%|          | 0/16 [00:00<?, ?it/s]

              precision    recall  f1-score   support

       anger       0.80      0.79      0.79       326
        fear       0.93      0.91      0.92       223
         joy       0.63      0.64      0.64       264
        love       0.78      0.76      0.77       239
        none       0.75      0.78      0.77       320
     sadness       0.57      0.54      0.56       206
    surprise       0.59      0.55      0.57       165
    sympathy       0.84      0.92      0.88       193

    accuracy                           0.74      1936
   macro avg       0.74      0.74      0.74      1936
weighted avg       0.74      0.74      0.74      1936

