In [1]:
from datasets import load_dataset, load_metric, Dataset, DatasetDict
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from transformers import TrainingArguments, Trainer
from transformers import DefaultDataCollator
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tqdm.auto import tqdm
from sklearn.metrics import classification_report
from sklearn import metrics
from scipy.special import softmax

# Load Data

In [2]:
df = pd.read_excel("Question2_Data/train.xlsx")
df

Unnamed: 0,source,targets,category
0,"When news is brought to one of them, of (the b...",و چون یکی از آنان را به [ولادت] دختر مژده دهند...,quran
1,After them repaired Zadok the son of Immer ove...,و چون دشمنان ما شنیدند که ما آگاه شده‌ایم و خد...,bible
2,And establish regular prayers at the two ends ...,و نماز را در دو طرف روز و ساعات نخستین شب برپا...,quran
3,"And it came to pass, that, when I was come aga...",و فرمود تا مدعیانش نزد تو حاضر شوند؛ و از او ب...,bible
4,"Ah woe, that Day, to the Rejecters of Truth!",وای در آن روز بر تکذیب کنندگان!,quran
...,...,...,...
12595,"Women impure are for men impure, and men impur...",زنان پلید برای مردان پلید و مردان پلید برای زن...,quran
12596,I don't want any silly dance given in my honour.',بنابراین حالا هم میل ندارم جشنی به افتخار من د...,mizan
12597,And the Earth will shine with the Glory of its...,و زمین به نور پروردگارش روشن می‌شود، و کتاب [ا...,quran
12598,"Then lifted I up mine eyes, and saw, and behol...",گفتم: «این چیست؟» او جواب داد: «این است آن ایف...,bible


In [3]:
dataset = DatasetDict()
for data_set in ["train", "valid", "test"]:
    dataset[data_set] = Dataset.from_pandas(pd.read_excel(f"Question2_Data/{data_set}.xlsx"))
dataset

DatasetDict({
    train: Dataset({
        features: ['source', 'targets', 'category'],
        num_rows: 12600
    })
    valid: Dataset({
        features: ['source', 'targets', 'category'],
        num_rows: 2700
    })
    test: Dataset({
        features: ['source', 'targets', 'category'],
        num_rows: 2700
    })
})

# 1)

In [4]:
model_checkpoint = "google/electra-base-discriminator"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=3)
model

Some weights of the model checkpoint at google/electra-base-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.o

ElectraForSequenceClassification(
  (electra): ElectraModel(
    (embeddings): ElectraEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): ElectraEncoder(
      (layer): ModuleList(
        (0): ElectraLayer(
          (attention): ElectraAttention(
            (self): ElectraSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): ElectraSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm

In [5]:
metric = load_metric("accuracy")
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

str_to_int = {"quran": 0, "bible": 1, "mizan": 2}
def tokenize_function(examples):
    tokenized_batch = tokenizer(examples["source"], truncation=True, max_length=128)
    tokenized_batch["label"] = [str_to_int[label] for label in examples["category"]]
    return tokenized_batch

tokenized_datasets = dataset.map(tokenize_function, batched=True)
print(tokenized_datasets["train"][0])
tokenized_datasets

  0%|          | 0/13 [00:00<?, ?ba/s]

  0%|          | 0/3 [00:00<?, ?ba/s]

  0%|          | 0/3 [00:00<?, ?ba/s]

{'source': 'When news is brought to one of them, of (the birth of) a female (child), his face darkens, and he is filled with inward grief!', 'targets': 'و چون یکی از آنان را به [ولادت] دختر مژده دهند [از شدت خشم] چهره\u200cاش سیاه گردد، ودرونش از غصه واندوه لبریز و آکنده شود!!', 'category': 'quran', 'input_ids': [101, 2043, 2739, 2003, 2716, 2000, 2028, 1997, 2068, 1010, 1997, 1006, 1996, 4182, 1997, 1007, 1037, 2931, 1006, 2775, 1007, 1010, 2010, 2227, 2601, 6132, 1010, 1998, 2002, 2003, 3561, 2007, 20546, 9940, 999, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'label': 0}


DatasetDict({
    train: Dataset({
        features: ['source', 'targets', 'category', 'input_ids', 'token_type_ids', 'attention_mask', 'label'],
        num_rows: 12600
    })
    valid: Dataset({
        features: ['source', 'targets', 'category', 'input_ids', 'token_type_ids', 'attention_mask', 'label'],
        num_rows: 2700
    })
    test: Dataset({
        features: ['source', 'targets', 'category', 'input_ids', 'token_type_ids', 'attention_mask', 'label'],
        num_rows: 2700
    })
})

In [6]:
training_args = TrainingArguments(
    output_dir="q2_electra", 
    evaluation_strategy="epoch",
    logging_steps = 20,
    learning_rate=3e-5,
    num_train_epochs=10,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    save_total_limit = 1,
    group_by_length = True,
    seed=0,
#     save_strategy = "epoch",
#     load_best_model_at_end=True,
)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["valid"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)
trainer.train()

The following columns in the training set  don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: targets, category, source. If targets, category, source are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 12600
  Num Epochs = 10
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 3940


Epoch,Training Loss,Validation Loss,Accuracy
1,0.1517,0.522628,0.881111
2,0.0859,0.156232,0.961481
3,0.0714,0.16249,0.962593
4,0.0405,0.128865,0.972963
5,0.0012,0.164756,0.972593
6,0.0237,0.136521,0.978889
7,0.0002,0.170193,0.974074
8,0.0001,0.173018,0.974074
9,0.0001,0.154239,0.976667
10,0.0001,0.191684,0.974815


The following columns in the evaluation set  don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: targets, category, source. If targets, category, source are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2700
  Batch size = 32
Saving model checkpoint to q2_electra\checkpoint-500
Configuration saved in q2_electra\checkpoint-500\config.json
Model weights saved in q2_electra\checkpoint-500\pytorch_model.bin
tokenizer config file saved in q2_electra\checkpoint-500\tokenizer_config.json
Special tokens file saved in q2_electra\checkpoint-500\special_tokens_map.json
Deleting older checkpoint [q2_electra\checkpoint-788] due to args.save_total_limit
Deleting older checkpoint [q2_electra\checkpoint-3940] due to args.save_total_limit
The following columns in the evaluation set  don't have a corresponding argument in `ElectraForSequenceClassificatio

TrainOutput(global_step=3940, training_loss=0.046163889352317314, metrics={'train_runtime': 979.7426, 'train_samples_per_second': 128.605, 'train_steps_per_second': 4.021, 'total_flos': 1992673024574400.0, 'train_loss': 0.046163889352317314, 'epoch': 10.0})

In [26]:
pred = trainer.predict(tokenized_datasets["test"])
print(trainer.evaluate(tokenized_datasets["test"]))
y_pred = pred.predictions.argmax(axis=-1)
print(classification_report(tokenized_datasets["test"]["label"], y_pred, target_names=str_to_int.keys()))
print("AUC-ovr", metrics.roc_auc_score(tokenized_datasets["test"]["label"], 
                                       softmax(pred.predictions, axis=-1), multi_class="ovr"))
print("AUC-ovo", metrics.roc_auc_score(tokenized_datasets["test"]["label"], 
                                       softmax(pred.predictions, axis=-1), multi_class="ovo"))

The following columns in the test set  don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: targets, category, source. If targets, category, source are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 2700
  Batch size = 32
The following columns in the evaluation set  don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: targets, category, source. If targets, category, source are not expected by `ElectraForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2700
  Batch size = 32


{'eval_loss': 0.13852816820144653, 'eval_accuracy': 0.9814814814814815, 'eval_runtime': 11.1409, 'eval_samples_per_second': 242.35, 'eval_steps_per_second': 7.63, 'epoch': 10.0}
              precision    recall  f1-score   support

       quran       0.99      0.97      0.98       900
       bible       0.97      0.99      0.98       900
       mizan       0.99      0.98      0.98       900

    accuracy                           0.98      2700
   macro avg       0.98      0.98      0.98      2700
weighted avg       0.98      0.98      0.98      2700

AUC-ovr 0.9987802469135802
AUC-ovo 0.9987802469135802


# 2)

In [28]:
model_checkpoint = "HooshvareLab/bert-base-parsbert-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=3)

metric = load_metric("accuracy")
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

str_to_int = {"quran": 0, "bible": 1, "mizan": 2}
def tokenize_function(examples):
    tokenized_batch = tokenizer(examples["targets"], truncation=True, max_length=128)
    tokenized_batch["label"] = [str_to_int[label] for label in examples["category"]]
    return tokenized_batch

tokenized_datasets = dataset.map(tokenize_function, batched=True)
print(tokenized_datasets["train"][0])
tokenized_datasets

training_args = TrainingArguments(
    output_dir="q2_parsbert", 
    evaluation_strategy="epoch",
    logging_steps = 20,
    learning_rate=3e-5,
    num_train_epochs=10,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    save_total_limit = 1,
#     load_best_model_at_end=True,
#     save_strategy = "epoch",
#     metric_for_best_model="accuracy",
    group_by_length = True,
    seed=0,
)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["valid"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)
trainer.train()

Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file https://huggingface.co/HooshvareLab/bert-base-parsbert-uncased/resolve/main/config.json from cache at C:\Users\/.cache\huggingface\transformers\d3b7c3283a6a4ad4471f59269c9de8adadfab0b05eebf49a64e046fca56cdab2.58cfea678e7bd2c1de3bfd4a5357101526b9fbc32a994b9456047e55b0afbebe
Model config BertConfig {
  "_name_or_path": "HooshvareLab/bert-base-parsbert-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.18.0",
  "type_vocab_size": 2,
  "u

  0%|          | 0/13 [00:00<?, ?ba/s]

  0%|          | 0/3 [00:00<?, ?ba/s]

  0%|          | 0/3 [00:00<?, ?ba/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


{'source': 'When news is brought to one of them, of (the birth of) a female (child), his face darkens, and he is filled with inward grief!', 'targets': 'و چون یکی از آنان را به [ولادت] دختر مژده دهند [از شدت خشم] چهره\u200cاش سیاه گردد، ودرونش از غصه واندوه لبریز و آکنده شود!!', 'category': 'quran', 'input_ids': [2, 331, 3052, 2375, 2036, 3609, 2049, 2031, 24, 14790, 26, 4589, 26768, 3619, 24, 2036, 4306, 7594, 26, 25511, 4910, 6358, 300, 13893, 2061, 1176, 2036, 20934, 41316, 2328, 30036, 331, 31499, 2268, 5, 5, 4], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'label': 0}


The following columns in the training set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: targets, category, source. If targets, category, source are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 12600
  Num Epochs = 10
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 3940


Epoch,Training Loss,Validation Loss,Accuracy
1,0.1036,0.127444,0.954074
2,0.077,0.122845,0.969259
3,0.0215,0.179542,0.963333
4,0.0004,0.161452,0.974074
5,0.0001,0.165337,0.973704
6,0.0056,0.173197,0.974074
7,0.0,0.174485,0.975556
8,0.0008,0.174245,0.975926
9,0.0,0.180969,0.975926
10,0.0,0.176583,0.975556


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: targets, category, source. If targets, category, source are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2700
  Batch size = 32
Saving model checkpoint to q2_parsbert\checkpoint-500
Configuration saved in q2_parsbert\checkpoint-500\config.json
Model weights saved in q2_parsbert\checkpoint-500\pytorch_model.bin
tokenizer config file saved in q2_parsbert\checkpoint-500\tokenizer_config.json
Special tokens file saved in q2_parsbert\checkpoint-500\special_tokens_map.json
Deleting older checkpoint [q2_parsbert\checkpoint-3940] due to args.save_total_limit
The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: targets, category, source. If targets, category, sou

TrainOutput(global_step=3940, training_loss=0.02945325893386436, metrics={'train_runtime': 1098.5401, 'train_samples_per_second': 114.698, 'train_steps_per_second': 3.587, 'total_flos': 1876697563416480.0, 'train_loss': 0.02945325893386436, 'epoch': 10.0})

In [29]:
pred = trainer.predict(tokenized_datasets["test"])
print(trainer.evaluate(tokenized_datasets["test"]))
y_pred = pred.predictions.argmax(axis=-1)
print(classification_report(tokenized_datasets["test"]["label"], y_pred, target_names=str_to_int.keys()))
print("AUC-ovr", metrics.roc_auc_score(tokenized_datasets["test"]["label"], 
                                       softmax(pred.predictions, axis=-1), multi_class="ovr"))
print("AUC-ovo", metrics.roc_auc_score(tokenized_datasets["test"]["label"], 
                                       softmax(pred.predictions, axis=-1), multi_class="ovo"))

The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: targets, category, source. If targets, category, source are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 2700
  Batch size = 32


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: targets, category, source. If targets, category, source are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2700
  Batch size = 32


{'eval_loss': 0.1851060390472412, 'eval_accuracy': 0.9740740740740741, 'eval_runtime': 12.23, 'eval_samples_per_second': 220.769, 'eval_steps_per_second': 6.95, 'epoch': 10.0}
              precision    recall  f1-score   support

       quran       0.98      0.98      0.98       900
       bible       0.97      0.97      0.97       900
       mizan       0.97      0.97      0.97       900

    accuracy                           0.97      2700
   macro avg       0.97      0.97      0.97      2700
weighted avg       0.97      0.97      0.97      2700

AUC-ovr 0.9979840534979423
AUC-ovo 0.9979840534979424


# 3)

In [4]:
model_checkpoint = "xlm-roberta-base"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=3)

metric = load_metric("accuracy")
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

str_to_int = {"quran": 0, "bible": 1, "mizan": 2}
def tokenize_function(examples):
    tokenized_batch = tokenizer(examples["source"], examples["targets"], truncation=True, max_length=128)
    tokenized_batch["label"] = [str_to_int[label] for label in examples["category"]]
    return tokenized_batch

tokenized_datasets = dataset.map(tokenize_function, batched=True)
print(tokenized_datasets["train"][0])
tokenized_datasets

training_args = TrainingArguments(
    output_dir="q2_parsbert", 
    evaluation_strategy="epoch",
    logging_steps = 20,
    learning_rate=3e-5,
    num_train_epochs=5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    save_total_limit = 1,
#     load_best_model_at_end=True,
#     save_strategy = "epoch",
#     metric_for_best_model="accuracy",
    group_by_length = True,
    seed=0,
)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["valid"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)
trainer.train()

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.bias', 'roberta.pooler.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.out_p

  0%|          | 0/13 [00:00<?, ?ba/s]

  0%|          | 0/3 [00:00<?, ?ba/s]

  0%|          | 0/3 [00:00<?, ?ba/s]

{'source': 'When news is brought to one of them, of (the birth of) a female (child), his face darkens, and he is filled with inward grief!', 'targets': 'و چون یکی از آنان را به [ولادت] دختر مژده دهند [از شدت خشم] چهره\u200cاش سیاه گردد، ودرونش از غصه واندوه لبریز و آکنده شود!!', 'category': 'quran', 'input_ids': [0, 14847, 7123, 83, 91048, 47, 1632, 111, 2856, 4, 111, 15, 2347, 127319, 111, 16, 10, 117776, 15, 206, 38472, 247, 1919, 2577, 43334, 1755, 4, 136, 764, 83, 152382, 678, 23, 19364, 10314, 4240, 38, 2, 2, 65, 17980, 8583, 270, 45903, 406, 178, 378, 3606, 159045, 268, 33514, 665, 20125, 6974, 29936, 378, 8428, 46687, 76127, 376, 268, 94699, 14524, 91080, 38803, 50, 141682, 900, 870, 270, 9475, 3092, 176, 3138, 13370, 176, 43720, 103347, 65, 4573, 1901, 12157, 1994, 1146, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1

The following columns in the training set  don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: targets, source, category. If targets, source, category are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 12600
  Num Epochs = 5
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 3940


Epoch,Training Loss,Validation Loss,Accuracy
1,0.1715,0.1167,0.975926
2,0.0039,0.109969,0.982963
3,0.0947,0.103452,0.984074
4,0.0007,0.064465,0.98963
5,0.0001,0.052393,0.991852


Saving model checkpoint to q2_parsbert\checkpoint-500
Configuration saved in q2_parsbert\checkpoint-500\config.json
Model weights saved in q2_parsbert\checkpoint-500\pytorch_model.bin
tokenizer config file saved in q2_parsbert\checkpoint-500\tokenizer_config.json
Special tokens file saved in q2_parsbert\checkpoint-500\special_tokens_map.json
Deleting older checkpoint [q2_parsbert\checkpoint-3500] due to args.save_total_limit
The following columns in the evaluation set  don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: targets, source, category. If targets, source, category are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2700
  Batch size = 16
Saving model checkpoint to q2_parsbert\checkpoint-1000
Configuration saved in q2_parsbert\checkpoint-1000\config.json
Model weights saved in q2_parsbert\checkpoint-1000\pytorch_model.bin


TrainOutput(global_step=3940, training_loss=0.08316359371286675, metrics={'train_runtime': 1408.4441, 'train_samples_per_second': 44.73, 'train_steps_per_second': 2.797, 'total_flos': 2067076565731296.0, 'train_loss': 0.08316359371286675, 'epoch': 5.0})

In [5]:
pred = trainer.predict(tokenized_datasets["test"])
print(trainer.evaluate(tokenized_datasets["test"]))
y_pred = pred.predictions.argmax(axis=-1)
print(classification_report(tokenized_datasets["test"]["label"], y_pred, target_names=str_to_int.keys()))
print("AUC-ovr", metrics.roc_auc_score(tokenized_datasets["test"]["label"], 
                                       softmax(pred.predictions, axis=-1), multi_class="ovr"))
print("AUC-ovo", metrics.roc_auc_score(tokenized_datasets["test"]["label"], 
                                       softmax(pred.predictions, axis=-1), multi_class="ovo"))

The following columns in the test set  don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: targets, source, category. If targets, source, category are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 2700
  Batch size = 16


The following columns in the evaluation set  don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: targets, source, category. If targets, source, category are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2700
  Batch size = 16


{'eval_loss': 0.06184544786810875, 'eval_accuracy': 0.9918518518518519, 'eval_runtime': 17.9835, 'eval_samples_per_second': 150.138, 'eval_steps_per_second': 9.397, 'epoch': 5.0}
              precision    recall  f1-score   support

       quran       0.99      0.99      0.99       900
       bible       0.99      1.00      0.99       900
       mizan       0.99      0.99      0.99       900

    accuracy                           0.99      2700
   macro avg       0.99      0.99      0.99      2700
weighted avg       0.99      0.99      0.99      2700

AUC-ovr 0.9992831275720165
AUC-ovo 0.9992831275720165
