# CA 4 - Part 2 

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install transformers

In [6]:
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForMaskedLM, AutoModel, BertTokenizer
from transformers import BertForSequenceClassification, TrainingArguments, Trainer
import numpy as np
from sklearn.metrics import accuracy_score, f1_score


## Loading the data


In [7]:
train_df = pd.read_excel('/content/drive/MyDrive/Data_Colab/NLP_CA4_Data/Part1/train.xlsx')
test_df = pd.read_excel('/content/drive/MyDrive/Data_Colab/NLP_CA4_Data/Part1/test.xlsx')
valid_df = pd.read_excel('/content/drive/MyDrive/Data_Colab/NLP_CA4_Data/Part1/valid.xlsx')

In [8]:
train_df.head()

Unnamed: 0,source,targets,category
0,"When news is brought to one of them, of (the b...",و چون یکی از آنان را به [ولادت] دختر مژده دهند...,quran
1,After them repaired Zadok the son of Immer ove...,و چون دشمنان ما شنیدند که ما آگاه شده‌ایم و خد...,bible
2,And establish regular prayers at the two ends ...,و نماز را در دو طرف روز و ساعات نخستین شب برپا...,quran
3,"And it came to pass, that, when I was come aga...",و فرمود تا مدعیانش نزد تو حاضر شوند؛ و از او ب...,bible
4,"Ah woe, that Day, to the Rejecters of Truth!",وای در آن روز بر تکذیب کنندگان!,quran


In [9]:
#Missing values in training set
train_df.isnull().sum()
#Missing values in test set
test_df.isnull().sum()

source      0
targets     0
category    0
dtype: int64

In [10]:
train_df.category.value_counts()

quran    4200
bible    4200
mizan    4200
Name: category, dtype: int64

In [11]:
print("Quran example :",train_df[train_df['category']=='quran']['source'].values[0], train_df[train_df['category']=='quran']['targets'].values[0])
print("Bible example :",train_df[train_df['category']=='bible']['source'].values[0], train_df[train_df['category']=='bible']['targets'].values[0])
print("Mizan example :",train_df[train_df['category']=='mizan']['source'].values[0], train_df[train_df['category']=='mizan']['targets'].values[0])

Quran example : When news is brought to one of them, of (the birth of) a female (child), his face darkens, and he is filled with inward grief! و چون یکی از آنان را به [ولادت] دختر مژده دهند [از شدت خشم] چهره‌اش سیاه گردد، ودرونش از غصه واندوه لبریز و آکنده شود!!
Bible example : After them repaired Zadok the son of Immer over against his house. After him repaired also Shemaiah the son of Shechaniah, the keeper of the east gate. و چون دشمنان ما شنیدند که ما آگاه شده‌ایم و خدا مشورت ایشان را باطل کرده است، آنگاه جمیع ما هر کس به‌کار خود به حصاربرگشتیم.
Mizan example : This man had become a just man in the full force of the term. این مرد، بت مام معنی کلمه، یک مرد درستکار شده بود.


In [12]:
train_df["category"] = train_df["category"].map({'mizan': 0, 'bible':1 , 'quran': 2})
test_df["category"] = test_df["category"].map({'mizan': 0, 'bible':1 , 'quran': 2})
valid_df["category"] = valid_df["category"].map({'mizan': 0, 'bible':1 , 'quran': 2})

## PART 1

In [13]:
# general config
MAX_LEN = 128
TRAIN_BATCH_SIZE = 32
VALID_BATCH_SIZE = 32
TEST_BATCH_SIZE = 32

EPOCHS = 10
EVERY_EPOCH = 500
LEARNING_RATE = 3e-5

In [14]:
X_train_e = train_df['source']
y_train_e = train_df['category']

X_test_e = test_df['source']
y_test_e = test_df['category']

X_eval_e = valid_df['source']
y_eval_e = valid_df['category']

In [15]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [16]:
X_train_e = X_train_e.values.tolist()
X_test_e = X_test_e.values.tolist()

y_train_e = y_train_e.values.tolist()
y_test_e = y_test_e.values.tolist()

X_eval_e = X_eval_e.values.tolist()
y_eval_e = y_eval_e.values.tolist()

In [17]:
train_encoding_e = tokenizer(X_train_e, padding=True, truncation=True, max_length=MAX_LEN, return_tensors = 'pt', return_attention_mask = True,)
test_encoding_e = tokenizer(X_test_e, padding=True, truncation=True, max_length=MAX_LEN, return_tensors = 'pt', return_attention_mask = True,)
eval_encoding_e = tokenizer(X_eval_e, padding=True, truncation=True, max_length=MAX_LEN, return_tensors = 'pt', return_attention_mask = True,)

In [18]:
class SourcedetectionDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)
    
## Test Dataset
# class SourcedetectionTestDataset(torch.utils.data.Dataset):
#     def __init__(self, encodings):
#         self.encodings = encodings

#     def __getitem__(self, idx):
#         item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
#         return item
#     def __len__(self):
#         return len(self.encodings)

## Generate Dataloaders

In [102]:
train_dataset = SourcedetectionDataset(train_encoding_e, y_train_e)
test_dataset = SourcedetectionDataset(test_encoding_e,y_test_e)
val_dataset = SourcedetectionDataset(eval_encoding_e, y_eval_e)

In [65]:
test_dataset.encodings

{'input_ids': tensor([[  101,  1996,  3189,  ...,     0,     0,     0],
        [  101,  1998,  1996,  ...,     0,     0,     0],
        [  101,  2073, 29278,  ...,     0,     0,     0],
        ...,
        [  101,  1998,  1996,  ...,     0,     0,     0],
        [  101,  2054,  2001,  ...,     0,     0,     0],
        [  101,  1998,  2043,  ...,     0,     0,     0]]), 'token_type_ids': tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]])}

## Define a Simple Metrics Function

In [20]:
def compute_metrics(p):
    pred, labels = p
    pred = np.argmax(pred, axis=1)

    accuracy = accuracy_score(y_true=labels, y_pred=pred)
    #AUC = auc_score(y_true=labels, y_pred=pred)
    #precision = precision_score(y_true=labels, y_pred=pred)
    f1 = f1_score(labels, pred, average='weighted')

    return {"accuracy": accuracy,"f1_score":f1}

In [None]:
training_args = TrainingArguments(
    output_dir='output',
    save_strategy='epoch',
    evaluation_strategy='epoch',
    eval_steps=EVERY_EPOCH,
    metric_for_best_model='accuracy',
    per_device_train_batch_size=TRAIN_BATCH_SIZE,
    per_device_eval_batch_size=VALID_BATCH_SIZE,
    num_train_epochs=EPOCHS,
    learning_rate=LEARNING_RATE,
    load_best_model_at_end=True,
    save_total_limit = 1,
    logging_steps = 20

)
training_args

Part1 - English Model

In [22]:
model = BertForSequenceClassification.from_pretrained(
    'bert-base-uncased', 
    num_labels = 3
)

Downloading:   0%|          | 0.00/420M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [23]:
trainer = Trainer(
    model=model, 
    args=training_args, 
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
)

In [24]:
trainer.train()

***** Running training *****
  Num examples = 12600
  Num Epochs = 10
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 3940
  import sys


Epoch,Training Loss,Validation Loss,Accuracy,F1 Score
1,0.1316,0.090378,0.973704,0.973708
2,0.0176,0.103379,0.978889,0.978871
3,0.015,0.136787,0.976667,0.976678
4,0.0002,0.139732,0.978148,0.978153
5,0.0024,0.164205,0.974815,0.974816
6,0.0001,0.141653,0.97963,0.979611
7,0.0001,0.15427,0.978519,0.978494
8,0.0,0.1449,0.981852,0.981847
9,0.0001,0.155107,0.98,0.979985
10,0.0,0.155177,0.98037,0.980363


***** Running Evaluation *****
  Num examples = 2700
  Batch size = 32
Saving model checkpoint to output/checkpoint-394
Configuration saved in output/checkpoint-394/config.json
Model weights saved in output/checkpoint-394/pytorch_model.bin
  import sys
***** Running Evaluation *****
  Num examples = 2700
  Batch size = 32
Saving model checkpoint to output/checkpoint-788
Configuration saved in output/checkpoint-788/config.json
Model weights saved in output/checkpoint-788/pytorch_model.bin
Deleting older checkpoint [output/checkpoint-394] due to args.save_total_limit
  import sys
***** Running Evaluation *****
  Num examples = 2700
  Batch size = 32
Saving model checkpoint to output/checkpoint-1182
Configuration saved in output/checkpoint-1182/config.json
Model weights saved in output/checkpoint-1182/pytorch_model.bin
  import sys
***** Running Evaluation *****
  Num examples = 2700
  Batch size = 32
Saving model checkpoint to output/checkpoint-1576
Configuration saved in output/checkpoi

TrainOutput(global_step=3940, training_loss=0.026713981847651065, metrics={'train_runtime': 2878.8067, 'train_samples_per_second': 43.768, 'train_steps_per_second': 1.369, 'total_flos': 8288072658432000.0, 'train_loss': 0.026713981847651065, 'epoch': 10.0})

In [103]:
trainer.evaluate()

***** Running Evaluation *****
  Num examples = 2700
  Batch size = 32
  import sys


Epoch,Training Loss,Validation Loss,Accuracy,F1 Score
0,No log,0.643579,0.722222,0.720527
0,No log,0.643579,0.722222,0.720527
0,No log,0.643579,0.722222,0.720527
0,No log,0.643579,0.722222,0.720527
0,No log,0.643579,0.722222,0.720527
0,No log,0.643579,0.722222,0.720527


{'eval_accuracy': 0.7222222222222222,
 'eval_f1_score': 0.7205268824533867,
 'eval_loss': 0.6435790061950684}

In [105]:
trainer.predict(test_dataset)

***** Running Prediction *****
  Num examples = 2700
  Batch size = 32
  import sys


Epoch,Training Loss,Validation Loss,Accuracy,F1 Score
0,No log,0.643579,0.722222,0.720527
0,No log,0.643579,0.722222,0.720527
0,No log,0.643579,0.722222,0.720527
0,No log,0.643579,0.722222,0.720527
0,No log,0.643579,0.722222,0.720527
0,No log,0.643579,0.722222,0.720527


PredictionOutput(predictions=array([[ 2.5573246 , -1.6872283 , -0.62084085],
       [-1.9858407 ,  2.1569655 , -0.09090541],
       [-1.8794104 ,  1.1986779 ,  0.5789565 ],
       ...,
       [-0.8117331 ,  1.2396013 , -0.15607154],
       [ 1.7356747 , -1.3906094 , -0.2912125 ],
       [-1.6275647 ,  1.6962999 , -0.15223016]], dtype=float32), label_ids=array([0, 1, 1, ..., 2, 0, 1]), metrics={'test_loss': 0.6494939923286438, 'test_accuracy': 0.7311111111111112, 'test_f1_score': 0.7291833809731869, 'test_runtime': 18.5031, 'test_samples_per_second': 145.921, 'test_steps_per_second': 4.594})

In [106]:
pin_memory=False
preds = trainer.predict(test_dataset=test_dataset)
preds

***** Running Prediction *****
  Num examples = 2700
  Batch size = 32
  import sys


Epoch,Training Loss,Validation Loss,Accuracy,F1 Score
0,No log,0.643579,0.722222,0.720527
0,No log,0.643579,0.722222,0.720527
0,No log,0.643579,0.722222,0.720527
0,No log,0.643579,0.722222,0.720527
0,No log,0.643579,0.722222,0.720527
0,No log,0.643579,0.722222,0.720527


PredictionOutput(predictions=array([[ 2.5573246 , -1.6872283 , -0.62084085],
       [-1.9858407 ,  2.1569655 , -0.09090541],
       [-1.8794104 ,  1.1986779 ,  0.5789565 ],
       ...,
       [-0.8117331 ,  1.2396013 , -0.15607154],
       [ 1.7356747 , -1.3906094 , -0.2912125 ],
       [-1.6275647 ,  1.6962999 , -0.15223016]], dtype=float32), label_ids=array([0, 1, 1, ..., 2, 0, 1]), metrics={'test_loss': 0.6494939923286438, 'test_accuracy': 0.7311111111111112, 'test_f1_score': 0.7291833809731869, 'test_runtime': 18.3283, 'test_samples_per_second': 147.313, 'test_steps_per_second': 4.638})

In [107]:
preds.metrics

{'test_accuracy': 0.7311111111111112,
 'test_f1_score': 0.7291833809731869,
 'test_loss': 0.6494939923286438,
 'test_runtime': 18.3283,
 'test_samples_per_second': 147.313,
 'test_steps_per_second': 4.638}

In [108]:
probs = torch.from_numpy(preds[0]).softmax(1)

# convert tensors to numpy array
predictions = probs.numpy()

In [123]:
predictions[0][0]/(predictions[0][0]+(0.5*(predictions[0][1] + predictions[1][0])))

0.9855648948881299

In [110]:
preds

PredictionOutput(predictions=array([[ 2.5573246 , -1.6872283 , -0.62084085],
       [-1.9858407 ,  2.1569655 , -0.09090541],
       [-1.8794104 ,  1.1986779 ,  0.5789565 ],
       ...,
       [-0.8117331 ,  1.2396013 , -0.15607154],
       [ 1.7356747 , -1.3906094 , -0.2912125 ],
       [-1.6275647 ,  1.6962999 , -0.15223016]], dtype=float32), label_ids=array([0, 1, 1, ..., 2, 0, 1]), metrics={'test_loss': 0.6494939923286438, 'test_accuracy': 0.7311111111111112, 'test_f1_score': 0.7291833809731869, 'test_runtime': 18.3283, 'test_samples_per_second': 147.313, 'test_steps_per_second': 4.638})

In [119]:
newdf = pd.DataFrame(predictions,columns=['0','1','2'])

In [122]:
results = np.argmax(predictions,axis=1)
newdf['labels'] = results
newdf

Unnamed: 0,0,1,2,labels
0,0.946966,0.013582,0.039453,0
1,0.014158,0.891661,0.094181,1
2,0.029068,0.631257,0.339676,1
3,0.050279,0.614773,0.334948,1
4,0.970722,0.009920,0.019358,0
...,...,...,...,...
2695,0.021379,0.908985,0.069636,1
2696,0.048761,0.434537,0.516702,2
2697,0.093417,0.726623,0.179960,1
2698,0.850611,0.037325,0.112064,0


In [124]:
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
report = classification_report(y_test_e, newdf['labels'], output_dict=True, zero_division=0)
report['auc_score'] = roc_auc_score(y_test_e, probs, multi_class='ovr')

In [127]:
report

{'0': {'f1-score': 0.8045112781954886,
  'precision': 0.7785862785862786,
  'recall': 0.8322222222222222,
  'support': 900},
 '1': {'f1-score': 0.6759036144578313,
  'precision': 0.7381578947368421,
  'recall': 0.6233333333333333,
  'support': 900},
 '2': {'f1-score': 0.7071352502662407,
  'precision': 0.6789366053169734,
  'recall': 0.7377777777777778,
  'support': 900},
 'accuracy': 0.7311111111111112,
 'auc_score': 0.8881551440329218,
 'macro avg': {'f1-score': 0.7291833809731868,
  'precision': 0.7318935928800313,
  'recall': 0.7311111111111112,
  'support': 2700},
 'weighted avg': {'f1-score': 0.7291833809731869,
  'precision': 0.7318935928800314,
  'recall': 0.7311111111111112,
  'support': 2700}}

Part2 - Persian Model

In [128]:
X_train_fa = train_df['targets']
y_train_fa = train_df['category']

X_test_fa = test_df['targets']
y_test_fa = test_df['category']

X_eval_fa = valid_df['targets']
y_eval_fa = valid_df['category']

In [30]:
tokenizer = AutoTokenizer.from_pretrained("HooshvareLab/bert-fa-zwnj-base")

https://huggingface.co/HooshvareLab/bert-fa-zwnj-base/resolve/main/tokenizer_config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpcavdh551


Downloading:   0%|          | 0.00/292 [00:00<?, ?B/s]

storing https://huggingface.co/HooshvareLab/bert-fa-zwnj-base/resolve/main/tokenizer_config.json in cache at /root/.cache/huggingface/transformers/2cb1f80ada8cc565308126c396b8fbbadf4680b3f85524ff482483bad82028bd.abf572b0d3e0408e681fd99e81aca6441495ec08464c3ff2706cba4d3d618254
creating metadata file for /root/.cache/huggingface/transformers/2cb1f80ada8cc565308126c396b8fbbadf4680b3f85524ff482483bad82028bd.abf572b0d3e0408e681fd99e81aca6441495ec08464c3ff2706cba4d3d618254
https://huggingface.co/HooshvareLab/bert-fa-zwnj-base/resolve/main/config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmp6gkwqqku


Downloading:   0%|          | 0.00/565 [00:00<?, ?B/s]

storing https://huggingface.co/HooshvareLab/bert-fa-zwnj-base/resolve/main/config.json in cache at /root/.cache/huggingface/transformers/ba5343a1268e870ceacba2041556e22a4480d6af08e330b0650f96c96a5e50b4.967063c40b5c987b6e0d2a6d1944963280b9a53e5821fcb5379494a07dc9408d
creating metadata file for /root/.cache/huggingface/transformers/ba5343a1268e870ceacba2041556e22a4480d6af08e330b0650f96c96a5e50b4.967063c40b5c987b6e0d2a6d1944963280b9a53e5821fcb5379494a07dc9408d
loading configuration file https://huggingface.co/HooshvareLab/bert-fa-zwnj-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/ba5343a1268e870ceacba2041556e22a4480d6af08e330b0650f96c96a5e50b4.967063c40b5c987b6e0d2a6d1944963280b9a53e5821fcb5379494a07dc9408d
Model config BertConfig {
  "_name_or_path": "HooshvareLab/bert-fa-zwnj-base",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "ge

Downloading:   0%|          | 0.00/416k [00:00<?, ?B/s]

storing https://huggingface.co/HooshvareLab/bert-fa-zwnj-base/resolve/main/vocab.txt in cache at /root/.cache/huggingface/transformers/69e6fc93aba72adbdbe587dd870aa6e40074b2852ae7f827430f79554d1b474d.75ba9b0b397e5ff811df7979fa501544a6dfde55eb643e16f670d7aa16f81c1d
creating metadata file for /root/.cache/huggingface/transformers/69e6fc93aba72adbdbe587dd870aa6e40074b2852ae7f827430f79554d1b474d.75ba9b0b397e5ff811df7979fa501544a6dfde55eb643e16f670d7aa16f81c1d
https://huggingface.co/HooshvareLab/bert-fa-zwnj-base/resolve/main/tokenizer.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpbyxj7kw9


Downloading:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

storing https://huggingface.co/HooshvareLab/bert-fa-zwnj-base/resolve/main/tokenizer.json in cache at /root/.cache/huggingface/transformers/27cf38fbd3f491fa577a35ed855e45e2f176b4c39a8377b4845e1c91550b31d7.473194e90cfe872b63370ba003df2403b89fffe014fa356ea877b47f35f125fe
creating metadata file for /root/.cache/huggingface/transformers/27cf38fbd3f491fa577a35ed855e45e2f176b4c39a8377b4845e1c91550b31d7.473194e90cfe872b63370ba003df2403b89fffe014fa356ea877b47f35f125fe
https://huggingface.co/HooshvareLab/bert-fa-zwnj-base/resolve/main/special_tokens_map.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpps1arm9m


Downloading:   0%|          | 0.00/134 [00:00<?, ?B/s]

storing https://huggingface.co/HooshvareLab/bert-fa-zwnj-base/resolve/main/special_tokens_map.json in cache at /root/.cache/huggingface/transformers/66ef075eb41504b8392a7755677a74bc54bdc0400035e67cc5f55c0af9a1b0a7.f982506b52498d4adb4bd491f593dc92b2ef6be61bfdbe9d30f53f963f9f5b66
creating metadata file for /root/.cache/huggingface/transformers/66ef075eb41504b8392a7755677a74bc54bdc0400035e67cc5f55c0af9a1b0a7.f982506b52498d4adb4bd491f593dc92b2ef6be61bfdbe9d30f53f963f9f5b66
loading file https://huggingface.co/HooshvareLab/bert-fa-zwnj-base/resolve/main/vocab.txt from cache at /root/.cache/huggingface/transformers/69e6fc93aba72adbdbe587dd870aa6e40074b2852ae7f827430f79554d1b474d.75ba9b0b397e5ff811df7979fa501544a6dfde55eb643e16f670d7aa16f81c1d
loading file https://huggingface.co/HooshvareLab/bert-fa-zwnj-base/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/27cf38fbd3f491fa577a35ed855e45e2f176b4c39a8377b4845e1c91550b31d7.473194e90cfe872b63370ba003df2403b89fffe014

In [31]:
model = BertForSequenceClassification.from_pretrained('HooshvareLab/bert-fa-zwnj-base', num_labels=3)

loading configuration file https://huggingface.co/HooshvareLab/bert-fa-zwnj-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/ba5343a1268e870ceacba2041556e22a4480d6af08e330b0650f96c96a5e50b4.967063c40b5c987b6e0d2a6d1944963280b9a53e5821fcb5379494a07dc9408d
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.19.2",
  "type

Downloading:   0%|          | 0.00/452M [00:00<?, ?B/s]

storing https://huggingface.co/HooshvareLab/bert-fa-zwnj-base/resolve/main/pytorch_model.bin in cache at /root/.cache/huggingface/transformers/9244c1308d29835498aa18596303b01c3320a16809c5cb9cf99a48aba7852316.1ac4856b66eb038d8dca0407abe430cdf73258c2c5332110a94879e67fb041bb
creating metadata file for /root/.cache/huggingface/transformers/9244c1308d29835498aa18596303b01c3320a16809c5cb9cf99a48aba7852316.1ac4856b66eb038d8dca0407abe430cdf73258c2c5332110a94879e67fb041bb
loading weights file https://huggingface.co/HooshvareLab/bert-fa-zwnj-base/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/9244c1308d29835498aa18596303b01c3320a16809c5cb9cf99a48aba7852316.1ac4856b66eb038d8dca0407abe430cdf73258c2c5332110a94879e67fb041bb
Some weights of the model checkpoint at HooshvareLab/bert-fa-zwnj-base were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transf

In [32]:
X_train_fa = X_train_fa.values.tolist()
X_test_fa = X_test_fa.values.tolist()

y_train_fa = y_train_fa.values.tolist()
y_test_fa = y_test_fa.values.tolist()

X_eval_fa = X_eval_fa.values.tolist()
y_eval_fa = y_eval_fa.values.tolist()

In [33]:
train_encoding_fa = tokenizer(X_train_fa, padding=True, truncation=True, max_length=MAX_LEN, return_tensors = 'pt', return_attention_mask = True,)
test_encoding_fa = tokenizer(X_test_fa, padding=True, truncation=True, max_length=MAX_LEN, return_tensors = 'pt', return_attention_mask = True,)
eval_encoding_fa = tokenizer(X_eval_fa, padding=True, truncation=True, max_length=MAX_LEN, return_tensors = 'pt', return_attention_mask = True,)

## Generate Dataloaders

In [140]:
train_dataset_fa = SourcedetectionDataset(train_encoding_fa, y_train_fa)
test_dataset_fa = SourcedetectionDataset(test_encoding_fa, y_test_fa)
val_dataset_fa = SourcedetectionDataset(eval_encoding_fa, y_eval_fa)

In [35]:
trainer = Trainer(
    model=model, 
    args=training_args, 
    train_dataset=train_dataset_fa,
    eval_dataset=val_dataset_fa,
    compute_metrics=compute_metrics,
)

In [129]:
trainer.train()

***** Running training *****
  Num examples = 12600
  Num Epochs = 10
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 3940
  import sys


Epoch,Training Loss,Validation Loss,Accuracy,F1 Score
1,0.1968,0.217648,0.920741,0.921043
2,0.1298,0.172511,0.94,0.939963
3,0.0596,0.196941,0.945556,0.945439
4,0.0343,0.239571,0.950741,0.950805
5,0.0042,0.346841,0.947778,0.947893
6,0.0001,0.292571,0.954074,0.954101
7,0.0134,0.315748,0.954444,0.954419
8,0.0001,0.317859,0.955926,0.955931
9,0.0002,0.331597,0.955556,0.955581
10,0.0001,0.331659,0.957037,0.957051


***** Running Evaluation *****
  Num examples = 2700
  Batch size = 32


Epoch,Training Loss,Validation Loss,Accuracy,F1 Score
0,No log,0.643579,0.722222,0.720527
0,No log,0.643579,0.722222,0.720527
0,No log,0.643579,0.722222,0.720527
0,No log,0.643579,0.722222,0.720527
0,No log,0.643579,0.722222,0.720527
0,No log,0.643579,0.722222,0.720527


Saving model checkpoint to output/checkpoint-394
Configuration saved in output/checkpoint-394/config.json
Model weights saved in output/checkpoint-394/pytorch_model.bin
Deleting older checkpoint [output/checkpoint-3152] due to args.save_total_limit
Deleting older checkpoint [output/checkpoint-3940] due to args.save_total_limit
  import sys
***** Running Evaluation *****
  Num examples = 2700
  Batch size = 32
Saving model checkpoint to output/checkpoint-788
Configuration saved in output/checkpoint-788/config.json
Model weights saved in output/checkpoint-788/pytorch_model.bin
Deleting older checkpoint [output/checkpoint-394] due to args.save_total_limit
  import sys
***** Running Evaluation *****
  Num examples = 2700
  Batch size = 32
Saving model checkpoint to output/checkpoint-1182
Configuration saved in output/checkpoint-1182/config.json
Model weights saved in output/checkpoint-1182/pytorch_model.bin
Deleting older checkpoint [output/checkpoint-788] due to args.save_total_limit
  im

TrainOutput(global_step=3940, training_loss=0.05797318781967314, metrics={'train_runtime': 2895.2802, 'train_samples_per_second': 43.519, 'train_steps_per_second': 1.361, 'total_flos': 8347010064003072.0, 'train_loss': 0.05797318781967314, 'epoch': 10.0})

In [130]:
trainer.evaluate()

***** Running Evaluation *****
  Num examples = 2700
  Batch size = 32
  import sys


{'epoch': 10.0,
 'eval_accuracy': 0.957037037037037,
 'eval_f1_score': 0.9570514586184313,
 'eval_loss': 0.33165913820266724,
 'eval_runtime': 20.6474,
 'eval_samples_per_second': 130.767,
 'eval_steps_per_second': 4.117}

In [142]:
trainer.predict(test_dataset_fa)

***** Running Prediction *****
  Num examples = 2700
  Batch size = 32
  import sys


PredictionOutput(predictions=array([[ 6.316997 , -2.2099993, -4.1114054],
       [ 6.6119394, -3.2139454, -2.930717 ],
       [ 6.20278  , -2.2130094, -3.702181 ],
       ...,
       [ 7.6432095, -3.7733893, -3.5714967],
       [ 7.7149916, -3.760648 , -3.8966696],
       [ 5.295959 , -1.7636366, -2.8491943]], dtype=float32), label_ids=array([0, 1, 1, ..., 2, 0, 1]), metrics={'test_loss': 5.779371738433838, 'test_accuracy': 0.33925925925925926, 'test_f1_score': 0.18363754418783274, 'test_runtime': 21.0096, 'test_samples_per_second': 128.513, 'test_steps_per_second': 4.046})

In [143]:
pin_memory=False
preds = trainer.predict(test_dataset=test_dataset_fa)
preds

***** Running Prediction *****
  Num examples = 2700
  Batch size = 32
  import sys


PredictionOutput(predictions=array([[ 6.316997 , -2.2099993, -4.1114054],
       [ 6.6119394, -3.2139454, -2.930717 ],
       [ 6.20278  , -2.2130094, -3.702181 ],
       ...,
       [ 7.6432095, -3.7733893, -3.5714967],
       [ 7.7149916, -3.760648 , -3.8966696],
       [ 5.295959 , -1.7636366, -2.8491943]], dtype=float32), label_ids=array([0, 1, 1, ..., 2, 0, 1]), metrics={'test_loss': 5.779371738433838, 'test_accuracy': 0.33925925925925926, 'test_f1_score': 0.18363754418783274, 'test_runtime': 20.4436, 'test_samples_per_second': 132.071, 'test_steps_per_second': 4.158})

In [144]:
preds.metrics

{'test_accuracy': 0.33925925925925926,
 'test_f1_score': 0.18363754418783274,
 'test_loss': 5.779371738433838,
 'test_runtime': 20.4436,
 'test_samples_per_second': 132.071,
 'test_steps_per_second': 4.158}

In [145]:
probs = torch.from_numpy(preds[0]).softmax(1)

# convert tensors to numpy array
predictions = probs.numpy()

In [146]:
predictions[0][0]/(predictions[0][0]+(0.5*(predictions[0][1] + predictions[1][0])))

0.66660004666447

In [147]:
preds

PredictionOutput(predictions=array([[ 6.316997 , -2.2099993, -4.1114054],
       [ 6.6119394, -3.2139454, -2.930717 ],
       [ 6.20278  , -2.2130094, -3.702181 ],
       ...,
       [ 7.6432095, -3.7733893, -3.5714967],
       [ 7.7149916, -3.760648 , -3.8966696],
       [ 5.295959 , -1.7636366, -2.8491943]], dtype=float32), label_ids=array([0, 1, 1, ..., 2, 0, 1]), metrics={'test_loss': 5.779371738433838, 'test_accuracy': 0.33925925925925926, 'test_f1_score': 0.18363754418783274, 'test_runtime': 20.4436, 'test_samples_per_second': 132.071, 'test_steps_per_second': 4.158})

In [148]:
newdf = pd.DataFrame(predictions,columns=['0','1','2'])

In [149]:
results = np.argmax(predictions,axis=1)
newdf['labels'] = results
newdf

Unnamed: 0,0,1,2,labels
0,0.999772,0.000198,0.000030,0
1,0.999874,0.000054,0.000072,0
2,0.999729,0.000221,0.000050,0
3,0.999908,0.000046,0.000046,0
4,0.999968,0.000009,0.000023,0
...,...,...,...,...
2695,0.999848,0.000114,0.000038,0
2696,0.999892,0.000046,0.000062,0
2697,0.999976,0.000011,0.000013,0
2698,0.999981,0.000010,0.000009,0


In [150]:
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
report = classification_report(y_test_fa, newdf['labels'], output_dict=True, zero_division=0)
report['auc_score'] = roc_auc_score(y_test_fa, probs, multi_class='ovr')

In [151]:
report

{'0': {'f1-score': 0.5008412787436904,
  'precision': 0.33495873968492124,
  'recall': 0.9922222222222222,
  'support': 900},
 '1': {'f1-score': 0.0391304347826087,
  'precision': 0.9,
  'recall': 0.02,
  'support': 900},
 '2': {'f1-score': 0.010940919037199126,
  'precision': 0.35714285714285715,
  'recall': 0.005555555555555556,
  'support': 900},
 'accuracy': 0.33925925925925926,
 'auc_score': 0.5951082304526749,
 'macro avg': {'f1-score': 0.18363754418783276,
  'precision': 0.5307005322759262,
  'recall': 0.3392592592592592,
  'support': 2700},
 'weighted avg': {'f1-score': 0.18363754418783274,
  'precision': 0.5307005322759261,
  'recall': 0.33925925925925926,
  'support': 2700}}

Part 3 - Multilingual

In [152]:
train_labels = train_df['category'].values
train_sent1 = train_df['source'].values
train_sent2 = train_df['targets'].values

valid_labels = valid_df['category'].values
valid_sent1 = valid_df['source'].values
valid_sent2 = valid_df['targets'].values

test_labels = test_df['category'].values
test_sent1 = test_df['source'].values
test_sent2 = test_df['targets'].values

In [153]:
from transformers import AutoTokenizer, AutoModelForMaskedLM, AutoModel
from transformers import BertForSequenceClassification
tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")
model = BertForSequenceClassification.from_pretrained(
    'xlm-roberta-base', 
    num_labels = 3
)

Could not locate the tokenizer configuration file, will try to use the model config instead.
https://huggingface.co/xlm-roberta-base/resolve/main/config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmps4chcvpn


Downloading:   0%|          | 0.00/615 [00:00<?, ?B/s]

storing https://huggingface.co/xlm-roberta-base/resolve/main/config.json in cache at /root/.cache/huggingface/transformers/87683eb92ea383b0475fecf99970e950a03c9ff5e51648d6eee56fb754612465.dfaaaedc7c1c475302398f09706cbb21e23951b73c6e2b3162c1c8a99bb3b62a
creating metadata file for /root/.cache/huggingface/transformers/87683eb92ea383b0475fecf99970e950a03c9ff5e51648d6eee56fb754612465.dfaaaedc7c1c475302398f09706cbb21e23951b73c6e2b3162c1c8a99bb3b62a
loading configuration file https://huggingface.co/xlm-roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/87683eb92ea383b0475fecf99970e950a03c9ff5e51648d6eee56fb754612465.dfaaaedc7c1c475302398f09706cbb21e23951b73c6e2b3162c1c8a99bb3b62a
Model config XLMRobertaConfig {
  "_name_or_path": "xlm-roberta-base",
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_

Downloading:   0%|          | 0.00/4.83M [00:00<?, ?B/s]

storing https://huggingface.co/xlm-roberta-base/resolve/main/sentencepiece.bpe.model in cache at /root/.cache/huggingface/transformers/9df9ae4442348b73950203b63d1b8ed2d18eba68921872aee0c3a9d05b9673c6.00628a9eeb8baf4080d44a0abe9fe8057893de20c7cb6e6423cddbf452f7d4d8
creating metadata file for /root/.cache/huggingface/transformers/9df9ae4442348b73950203b63d1b8ed2d18eba68921872aee0c3a9d05b9673c6.00628a9eeb8baf4080d44a0abe9fe8057893de20c7cb6e6423cddbf452f7d4d8
https://huggingface.co/xlm-roberta-base/resolve/main/tokenizer.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpydqcvy5s


Downloading:   0%|          | 0.00/8.68M [00:00<?, ?B/s]

storing https://huggingface.co/xlm-roberta-base/resolve/main/tokenizer.json in cache at /root/.cache/huggingface/transformers/daeda8d936162ca65fe6dd158ecce1d8cb56c17d89b78ab86be1558eaef1d76a.a984cf52fc87644bd4a2165f1e07e0ac880272c1e82d648b4674907056912bd7
creating metadata file for /root/.cache/huggingface/transformers/daeda8d936162ca65fe6dd158ecce1d8cb56c17d89b78ab86be1558eaef1d76a.a984cf52fc87644bd4a2165f1e07e0ac880272c1e82d648b4674907056912bd7
loading file https://huggingface.co/xlm-roberta-base/resolve/main/sentencepiece.bpe.model from cache at /root/.cache/huggingface/transformers/9df9ae4442348b73950203b63d1b8ed2d18eba68921872aee0c3a9d05b9673c6.00628a9eeb8baf4080d44a0abe9fe8057893de20c7cb6e6423cddbf452f7d4d8
loading file https://huggingface.co/xlm-roberta-base/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/daeda8d936162ca65fe6dd158ecce1d8cb56c17d89b78ab86be1558eaef1d76a.a984cf52fc87644bd4a2165f1e07e0ac880272c1e82d648b4674907056912bd7
loading file h

Downloading:   0%|          | 0.00/1.04G [00:00<?, ?B/s]

storing https://huggingface.co/xlm-roberta-base/resolve/main/pytorch_model.bin in cache at /root/.cache/huggingface/transformers/97d0ea09f8074264957d062ec20ccb79af7b917d091add8261b26874daf51b5d.f42212747c1c27fcebaa0a89e2a83c38c6d3d4340f21922f892b88d882146ac2
creating metadata file for /root/.cache/huggingface/transformers/97d0ea09f8074264957d062ec20ccb79af7b917d091add8261b26874daf51b5d.f42212747c1c27fcebaa0a89e2a83c38c6d3d4340f21922f892b88d882146ac2
loading weights file https://huggingface.co/xlm-roberta-base/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/97d0ea09f8074264957d062ec20ccb79af7b917d091add8261b26874daf51b5d.f42212747c1c27fcebaa0a89e2a83c38c6d3d4340f21922f892b88d882146ac2
Some weights of the model checkpoint at xlm-roberta-base were not used when initializing BertForSequenceClassification: ['roberta.encoder.layer.5.intermediate.dense.bias', 'roberta.encoder.layer.8.intermediate.dense.bias', 'roberta.encoder.layer.1.attention.self.key.bias'

In [154]:
tokens_train = tokenizer.batch_encode_plus(
    list(zip(train_sent1.tolist(), train_sent2.tolist())),
    add_special_tokens = True,
    max_length = 128,
    pad_to_max_length=True,
    return_attention_mask = True,
    truncation=True,
    return_tensors = 'pt'
)

tokens_val = tokenizer.batch_encode_plus(
    list(zip(valid_sent1.tolist(), valid_sent2.tolist())),
    add_special_tokens = True,
    max_length = 128,
    pad_to_max_length=True,
    return_attention_mask = True,
    truncation=True,
    return_tensors = 'pt'
)


tokens_test = tokenizer.batch_encode_plus(
    list(zip(test_sent1.tolist(), test_sent2.tolist())),
    add_special_tokens = True,
    max_length = 128,
    pad_to_max_length=True,
    return_attention_mask = True,
    truncation=True,
    return_tensors = 'pt'
)



In [155]:
train_dataset_mul = SourcedetectionDataset(tokens_train, train_labels)
test_dataset_mul = SourcedetectionDataset(tokens_test, test_labels)
val_dataset_mul = SourcedetectionDataset(tokens_val, valid_labels)

In [156]:
trainer = Trainer(
    model=model, 
    args=training_args, 
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
)

In [157]:
trainer.train()

***** Running training *****
  Num examples = 12600
  Num Epochs = 10
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 3940
  import sys


Epoch,Training Loss,Validation Loss,Accuracy,F1 Score
1,0.2662,0.284266,0.908519,0.908156
2,0.1363,0.21023,0.932222,0.932313
3,0.0911,0.177087,0.955556,0.955612
4,0.0892,0.18879,0.95,0.949959
5,0.1075,0.203336,0.952593,0.952645
6,0.0709,0.248968,0.953333,0.953292
7,0.0131,0.272241,0.956296,0.956314
8,0.0554,0.305359,0.954444,0.954366
9,0.0298,0.283047,0.958889,0.958901
10,0.0007,0.285543,0.958519,0.958524


***** Running Evaluation *****
  Num examples = 2700
  Batch size = 32
Saving model checkpoint to output/checkpoint-394
Configuration saved in output/checkpoint-394/config.json
Model weights saved in output/checkpoint-394/pytorch_model.bin
Deleting older checkpoint [output/checkpoint-3940] due to args.save_total_limit
  import sys
***** Running Evaluation *****
  Num examples = 2700
  Batch size = 32
Saving model checkpoint to output/checkpoint-788
Configuration saved in output/checkpoint-788/config.json
Model weights saved in output/checkpoint-788/pytorch_model.bin
Deleting older checkpoint [output/checkpoint-394] due to args.save_total_limit
  import sys
***** Running Evaluation *****
  Num examples = 2700
  Batch size = 32
Saving model checkpoint to output/checkpoint-1182
Configuration saved in output/checkpoint-1182/config.json
Model weights saved in output/checkpoint-1182/pytorch_model.bin
Deleting older checkpoint [output/checkpoint-788] due to args.save_total_limit
  import sys


TrainOutput(global_step=3940, training_loss=0.10592991226591492, metrics={'train_runtime': 3263.0016, 'train_samples_per_second': 38.615, 'train_steps_per_second': 1.207, 'total_flos': 8288072658432000.0, 'train_loss': 0.10592991226591492, 'epoch': 10.0})

In [158]:
trainer.evaluate()

***** Running Evaluation *****
  Num examples = 2700
  Batch size = 32
  import sys


{'epoch': 10.0,
 'eval_accuracy': 0.9588888888888889,
 'eval_f1_score': 0.9589005854265577,
 'eval_loss': 0.2830471098423004,
 'eval_runtime': 20.4003,
 'eval_samples_per_second': 132.351,
 'eval_steps_per_second': 4.167}

In [159]:
trainer.predict(test_dataset_mul)

***** Running Prediction *****
  Num examples = 2700
  Batch size = 32
  import sys


PredictionOutput(predictions=array([[ 6.6585526, -3.34853  , -3.3223636],
       [ 6.5233293, -3.9912715, -2.5814216],
       [ 6.586031 , -3.7137024, -2.8898609],
       ...,
       [ 6.5944824, -3.2234876, -3.3776712],
       [ 6.57023  , -3.5022123, -3.0767767],
       [ 6.61148  , -3.7232764, -2.9460506]], dtype=float32), label_ids=array([0, 1, 1, ..., 2, 0, 1]), metrics={'test_loss': 6.648030757904053, 'test_accuracy': 0.3296296296296296, 'test_f1_score': 0.1660073793446809, 'test_runtime': 21.2836, 'test_samples_per_second': 126.858, 'test_steps_per_second': 3.994})

In [160]:
pin_memory=False
preds = trainer.predict(test_dataset=test_dataset_mul)
preds

***** Running Prediction *****
  Num examples = 2700
  Batch size = 32
  import sys


PredictionOutput(predictions=array([[ 6.6585526, -3.34853  , -3.3223636],
       [ 6.5233293, -3.9912715, -2.5814216],
       [ 6.586031 , -3.7137024, -2.8898609],
       ...,
       [ 6.5944824, -3.2234876, -3.3776712],
       [ 6.57023  , -3.5022123, -3.0767767],
       [ 6.61148  , -3.7232764, -2.9460506]], dtype=float32), label_ids=array([0, 1, 1, ..., 2, 0, 1]), metrics={'test_loss': 6.648030757904053, 'test_accuracy': 0.3296296296296296, 'test_f1_score': 0.1660073793446809, 'test_runtime': 20.6041, 'test_samples_per_second': 131.042, 'test_steps_per_second': 4.125})

In [161]:
preds.metrics

{'test_accuracy': 0.3296296296296296,
 'test_f1_score': 0.1660073793446809,
 'test_loss': 6.648030757904053,
 'test_runtime': 20.6041,
 'test_samples_per_second': 131.042,
 'test_steps_per_second': 4.125}

In [162]:
probs = torch.from_numpy(preds[0]).softmax(1)

# convert tensors to numpy array
predictions = probs.numpy()

In [163]:
predictions[0][0]/(predictions[0][0]+(0.5*(predictions[0][1] + predictions[1][0])))

0.6666670905608956

In [164]:
preds

PredictionOutput(predictions=array([[ 6.6585526, -3.34853  , -3.3223636],
       [ 6.5233293, -3.9912715, -2.5814216],
       [ 6.586031 , -3.7137024, -2.8898609],
       ...,
       [ 6.5944824, -3.2234876, -3.3776712],
       [ 6.57023  , -3.5022123, -3.0767767],
       [ 6.61148  , -3.7232764, -2.9460506]], dtype=float32), label_ids=array([0, 1, 1, ..., 2, 0, 1]), metrics={'test_loss': 6.648030757904053, 'test_accuracy': 0.3296296296296296, 'test_f1_score': 0.1660073793446809, 'test_runtime': 20.6041, 'test_samples_per_second': 131.042, 'test_steps_per_second': 4.125})

In [165]:
newdf = pd.DataFrame(predictions,columns=['0','1','2'])

In [166]:
results = np.argmax(predictions,axis=1)
newdf['labels'] = results
newdf

Unnamed: 0,0,1,2,labels
0,0.999909,0.000045,0.000046,0
1,0.999862,0.000027,0.000111,0
2,0.999890,0.000034,0.000077,0
3,0.999884,0.000031,0.000085,0
4,0.999899,0.000039,0.000062,0
...,...,...,...,...
2695,0.999909,0.000041,0.000050,0
2696,0.999887,0.000030,0.000083,0
2697,0.999899,0.000054,0.000047,0
2698,0.999893,0.000042,0.000065,0


In [167]:
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
report = classification_report(test_labels, newdf['labels'], output_dict=True, zero_division=0)
report['auc_score'] = roc_auc_score(test_labels, probs, multi_class='ovr')

In [168]:
report

{'0': {'f1-score': 0.49581706636921363,
  'precision': 0.33097542814594194,
  'recall': 0.9877777777777778,
  'support': 900},
 '1': {'f1-score': 0.002205071664829107,
  'precision': 0.14285714285714285,
  'recall': 0.0011111111111111111,
  'support': 900},
 '2': {'f1-score': 0.0, 'precision': 0.0, 'recall': 0.0, 'support': 900},
 'accuracy': 0.3296296296296296,
 'auc_score': 0.49116141975308647,
 'macro avg': {'f1-score': 0.1660073793446809,
  'precision': 0.1579441903343616,
  'recall': 0.3296296296296296,
  'support': 2700},
 'weighted avg': {'f1-score': 0.1660073793446809,
  'precision': 0.15794419033436158,
  'recall': 0.3296296296296296,
  'support': 2700}}