In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-o1nly "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/dataset1/preprocessed.csv


In [3]:
import openpyxl
import torch
import pandas as pd
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm

In [4]:
workbook = openpyxl.Workbook()
worksheet = workbook.active
worksheet.append(["Fold", "Model", "Test Accuracy Before", "Test F1 Before", "Test MCC Before",
                  "Test Accuracy After", "Test F1 After", "Test MCC After"])

In [5]:
dataset=pd.read_csv("/kaggle/input/dataset1/preprocessed.csv")

In [6]:
dataset.head()

Unnamed: 0,tweet_id,tweet,sarcasm,processed_tweets
0,866871160725794816,Triple Talaq par Burbak Kuchh nahi bolega,0,Triple Talaq par Burbak Kuchh nahi bolega
1,880356789358743553,Batao ye uss site pr se akki sir ke verdict ni...,1,Batao ye uss site pr se akki sir ke verdict ni...
2,877751493889105920,Hindu baheno par julam bardas nahi hoga @Tripl...,0,Hindu baheno par julam bardas nahi hoga Hindu ...
3,901806457871466496,Naa bhai.. aisa nhi hai.. mere handle karne se...,0,Naa bhai .. aisa nhi hai .. mere handle karne ...
4,866264330748219392,#RememberingRajiv aaj agar musalman auraten tr...,0,Remembering Rajiv aaj agar musalman auraten tr...


In [7]:
dataset.drop(['tweet'],axis=1,inplace=True)

In [8]:
dataset['processed_tweets'][0]

'Triple Talaq par Burbak Kuchh nahi bolega'

In [9]:
template = "Categorize the following Hinglish text into one of the predefined groups: sarcastic and non-sarcastic. The text is \"////\"."

# Format the tweets into the template
dataset["processed_tweets"] = [template.replace("////", tweet) for tweet in dataset["processed_tweets"]]

In [10]:
test_models=["bert-base-multilingual-cased","distilbert-base-multilingual-cased","xlm-roberta-base","timpal0l/mdeberta-v3-base-squad2","microsoft/Multilingual-MiniLM-L12-H384"]

In [11]:

# Define evaluation function
def evaluate_model(data_loader, model, device):
    model.eval()
    predictions = []
    true_labels = []
    with torch.no_grad():
        for inputs, attention_mask, labels in data_loader:
            inputs, attention_mask, labels = inputs.to(device), attention_mask.to(device), labels.to(device)
            outputs = model(inputs, attention_mask=attention_mask)
            _, predicted = outputs.logits.max(1)
            predictions.extend(predicted.tolist())
            true_labels.extend(labels.tolist())
    accuracy = accuracy_score(true_labels, predictions)
    f1 = f1_score(true_labels, predictions)
    mcc = matthews_corrcoef(true_labels, predictions)
    return accuracy, f1, mcc

# Define training loop
def train_model(model, train_loader, optimizer, scheduler, device, num_epochs):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for batch in tqdm(train_loader, desc=f"Epoch {epoch + 1}/{num_epochs}"):
            input_ids, attention_mask, labels = batch 
            input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        scheduler.step()

        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(train_loader):.4f}")

        # Save the changes to the Excel file after each epoch
        workbook.save('finetuned_model_accu_scores2.xlsx')


# Tokenize input sequences
max_length = 128

def tokenize_data(data, tokenizer, max_length):
    tokenized_data = tokenizer(data['processed_tweets'].tolist(), padding=True, truncation=True, max_length=max_length, return_tensors='pt')
    labels = torch.tensor(data['sarcasm'].tolist())
    return tokenized_data, labels

# Split the dataset into 4 folds
num_folds = 4
kf = KFold(n_splits=num_folds)

# Initialize lists to store evaluation metrics
test_accuracy_before_list = []
test_accuracy_after_list = []
test_f1_before_list = []
test_f1_after_list = []
test_mcc_before_list = []
test_mcc_after_list = []

# Iterate over models
for i in range(0,len(test_models)): # 47th modela
    try:
        fold = 0
        for train_index, test_index in kf.split(dataset):
            fold += 1
            train_data_fold, test_data_fold = dataset.iloc[train_index], dataset.iloc[test_index]
            # Load tokenizer and model
            model_name = test_models[i]
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

            # Move model to appropriate device
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            model.to(device)

            # Define optimizer and learning rate scheduler
            optimizer = AdamW(model.parameters(), lr=2e-5)
            scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)

            # Tokenize and create DataLoader for training and testing data
            tokenized_train_data, train_labels = tokenize_data(train_data_fold, tokenizer, max_length)
            tokenized_test_data, test_labels = tokenize_data(test_data_fold, tokenizer, max_length)

            train_dataset = TensorDataset(tokenized_train_data['input_ids'], tokenized_train_data['attention_mask'],
                                          train_labels)
            test_dataset = TensorDataset(tokenized_test_data['input_ids'], tokenized_test_data['attention_mask'],
                                         test_labels)

            train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
            test_loader = DataLoader(test_dataset, batch_size=32)

            # Evaluate the model before training
            test_accuracy_before, test_f1_before, test_mcc_before = evaluate_model(test_loader, model, device)
            print(f"Fold {fold}, Model {model_name}:",i)
            print(f"Test Accuracy (Before fine-tuning): {test_accuracy_before:.2f}%")
            print(f"Test F1 Score (Before fine-tuning): {test_f1_before:.2f}")
            print(f"Test Matthews Correlation Coefficient (Before fine-tuning): {test_mcc_before:.2f}")

            # Train the model
            num_epochs = 10
            train_model(model, train_loader, optimizer, scheduler, device, num_epochs)

            # Evaluate the model after training
            test_accuracy_after, test_f1_after, test_mcc_after = evaluate_model(test_loader, model, device)
            print(f"Test Accuracy (After fine-tuning): {test_accuracy_after:.2f}%")
            print(f"Test F1 Score (After fine-tuning): {test_f1_after:.2f}")
            print(f"Test Matthews Correlation Coefficient (After fine-tuning): {test_mcc_after:.2f}")

            # Store evaluation metrics
            test_accuracy_before_list.append(test_accuracy_before)
            test_accuracy_after_list.append(test_accuracy_after)
            test_f1_before_list.append(test_f1_before)
            test_f1_after_list.append(test_f1_after)
            test_mcc_before_list.append(test_mcc_before)
            test_mcc_after_list.append(test_mcc_after)

            # Append the evaluation metrics to the Excel worksheet
            worksheet.append([fold, model_name, test_accuracy_before, test_f1_before, test_mcc_before,
                              test_accuracy_after, test_f1_after, test_mcc_after])

        print('Training and evaluation of model ', model_name, ' complete!')
    except Exception as e:
        print(f"Error in iteration {i}: {str(e)}")

# Calculate changes in metrics
accuracy_change = [(after - before) for before, after in zip(test_accuracy_before_list, test_accuracy_after_list)]
f1_change = [(after - before) for before, after in zip(test_f1_before_list, test_f1_after_list)]
mcc_change = [(after - before) for before, after in zip(test_mcc_before_list, test_mcc_after_list)]

# Calculate average scores after each fold
avg_accuracy_after_fold = sum(test_accuracy_after_list) / len(test_accuracy_after_list)
avg_f1_after_fold = sum(test_f1_after_list) / len(test_f1_after_list)
avg_mcc_after_fold = sum(test_mcc_after_list) / len(test_mcc_after_list)

# Print or store the results as per your requirement
print("Average Accuracy After Each Fold:", avg_accuracy_after_fold)
print("Average F1 Score After Each Fold:", avg_f1_after_fold)
print("Average Matthews Correlation Coefficient After Each Fold:", avg_mcc_after_fold)

# Save the evaluation metrics to an Excel file
workbook.save('evaluation_metrics2.xlsx')

tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/714M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 1, Model bert-base-multilingual-cased: 0
Test Accuracy (Before fine-tuning): 0.87%
Test F1 Score (Before fine-tuning): 0.01
Test Matthews Correlation Coefficient (Before fine-tuning): -0.04


Epoch 1/10: 100%|██████████| 123/123 [00:43<00:00,  2.85it/s]


Epoch 1/10, Loss: 0.3154


Epoch 2/10: 100%|██████████| 123/123 [00:42<00:00,  2.86it/s]


Epoch 2/10, Loss: 0.1744


Epoch 3/10: 100%|██████████| 123/123 [00:42<00:00,  2.86it/s]


Epoch 3/10, Loss: 0.1220


Epoch 4/10: 100%|██████████| 123/123 [00:43<00:00,  2.86it/s]


Epoch 4/10, Loss: 0.1035


Epoch 5/10: 100%|██████████| 123/123 [00:42<00:00,  2.86it/s]


Epoch 5/10, Loss: 0.0834


Epoch 6/10: 100%|██████████| 123/123 [00:43<00:00,  2.86it/s]


Epoch 6/10, Loss: 0.0561


Epoch 7/10: 100%|██████████| 123/123 [00:43<00:00,  2.86it/s]


Epoch 7/10, Loss: 0.0321


Epoch 8/10: 100%|██████████| 123/123 [00:42<00:00,  2.86it/s]


Epoch 8/10, Loss: 0.0246


Epoch 9/10: 100%|██████████| 123/123 [00:43<00:00,  2.86it/s]


Epoch 9/10, Loss: 0.0140


Epoch 10/10: 100%|██████████| 123/123 [00:42<00:00,  2.86it/s]


Epoch 10/10, Loss: 0.0086
Test Accuracy (After fine-tuning): 0.96%
Test F1 Score (After fine-tuning): 0.77
Test Matthews Correlation Coefficient (After fine-tuning): 0.76


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 2, Model bert-base-multilingual-cased: 0
Test Accuracy (Before fine-tuning): 0.90%
Test F1 Score (Before fine-tuning): 0.00
Test Matthews Correlation Coefficient (Before fine-tuning): 0.00


Epoch 1/10: 100%|██████████| 124/124 [00:43<00:00,  2.88it/s]


Epoch 1/10, Loss: 0.2725


Epoch 2/10: 100%|██████████| 124/124 [00:43<00:00,  2.88it/s]


Epoch 2/10, Loss: 0.1257


Epoch 3/10: 100%|██████████| 124/124 [00:43<00:00,  2.88it/s]


Epoch 3/10, Loss: 0.1077


Epoch 4/10: 100%|██████████| 124/124 [00:43<00:00,  2.88it/s]


Epoch 4/10, Loss: 0.0895


Epoch 5/10: 100%|██████████| 124/124 [00:43<00:00,  2.88it/s]


Epoch 5/10, Loss: 0.0737


Epoch 6/10: 100%|██████████| 124/124 [00:43<00:00,  2.88it/s]


Epoch 6/10, Loss: 0.0568


Epoch 7/10: 100%|██████████| 124/124 [00:43<00:00,  2.88it/s]


Epoch 7/10, Loss: 0.0399


Epoch 8/10: 100%|██████████| 124/124 [00:43<00:00,  2.88it/s]


Epoch 8/10, Loss: 0.0323


Epoch 9/10: 100%|██████████| 124/124 [00:43<00:00,  2.88it/s]


Epoch 9/10, Loss: 0.0186


Epoch 10/10: 100%|██████████| 124/124 [00:43<00:00,  2.88it/s]


Epoch 10/10, Loss: 0.0168
Test Accuracy (After fine-tuning): 0.95%
Test F1 Score (After fine-tuning): 0.65
Test Matthews Correlation Coefficient (After fine-tuning): 0.67


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 3, Model bert-base-multilingual-cased: 0
Test Accuracy (Before fine-tuning): 0.33%
Test F1 Score (Before fine-tuning): 0.19
Test Matthews Correlation Coefficient (Before fine-tuning): 0.06


Epoch 1/10: 100%|██████████| 124/124 [00:43<00:00,  2.88it/s]


Epoch 1/10, Loss: 0.2474


Epoch 2/10: 100%|██████████| 124/124 [00:43<00:00,  2.88it/s]


Epoch 2/10, Loss: 0.1299


Epoch 3/10: 100%|██████████| 124/124 [00:43<00:00,  2.88it/s]


Epoch 3/10, Loss: 0.1202


Epoch 4/10: 100%|██████████| 124/124 [00:43<00:00,  2.88it/s]


Epoch 4/10, Loss: 0.1071


Epoch 5/10: 100%|██████████| 124/124 [00:43<00:00,  2.88it/s]


Epoch 5/10, Loss: 0.0999


Epoch 6/10: 100%|██████████| 124/124 [00:43<00:00,  2.88it/s]


Epoch 6/10, Loss: 0.0876


Epoch 7/10: 100%|██████████| 124/124 [00:43<00:00,  2.88it/s]


Epoch 7/10, Loss: 0.0721


Epoch 8/10: 100%|██████████| 124/124 [00:43<00:00,  2.88it/s]


Epoch 8/10, Loss: 0.0520


Epoch 9/10: 100%|██████████| 124/124 [00:43<00:00,  2.88it/s]


Epoch 9/10, Loss: 0.0440


Epoch 10/10: 100%|██████████| 124/124 [00:43<00:00,  2.88it/s]


Epoch 10/10, Loss: 0.0228
Test Accuracy (After fine-tuning): 0.98%
Test F1 Score (After fine-tuning): 0.89
Test Matthews Correlation Coefficient (After fine-tuning): 0.88


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 4, Model bert-base-multilingual-cased: 0
Test Accuracy (Before fine-tuning): 0.38%
Test F1 Score (Before fine-tuning): 0.16
Test Matthews Correlation Coefficient (Before fine-tuning): 0.06


Epoch 1/10: 100%|██████████| 124/124 [00:42<00:00,  2.90it/s]


Epoch 1/10, Loss: 0.2886


Epoch 2/10: 100%|██████████| 124/124 [00:42<00:00,  2.90it/s]


Epoch 2/10, Loss: 0.0688


Epoch 3/10: 100%|██████████| 124/124 [00:42<00:00,  2.90it/s]


Epoch 3/10, Loss: 0.0422


Epoch 4/10: 100%|██████████| 124/124 [00:42<00:00,  2.90it/s]


Epoch 4/10, Loss: 0.0300


Epoch 5/10: 100%|██████████| 124/124 [00:42<00:00,  2.90it/s]


Epoch 5/10, Loss: 0.0231


Epoch 6/10: 100%|██████████| 124/124 [00:42<00:00,  2.90it/s]


Epoch 6/10, Loss: 0.0148


Epoch 7/10: 100%|██████████| 124/124 [00:42<00:00,  2.90it/s]


Epoch 7/10, Loss: 0.0101


Epoch 8/10: 100%|██████████| 124/124 [00:42<00:00,  2.90it/s]


Epoch 8/10, Loss: 0.0087


Epoch 9/10: 100%|██████████| 124/124 [00:42<00:00,  2.90it/s]


Epoch 9/10, Loss: 0.0053


Epoch 10/10: 100%|██████████| 124/124 [00:42<00:00,  2.90it/s]


Epoch 10/10, Loss: 0.0055
Test Accuracy (After fine-tuning): 0.83%
Test F1 Score (After fine-tuning): 0.46
Test Matthews Correlation Coefficient (After fine-tuning): 0.48
Training and evaluation of model  bert-base-multilingual-cased  complete!


tokenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/466 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/542M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 1, Model distilbert-base-multilingual-cased: 1
Test Accuracy (Before fine-tuning): 0.88%
Test F1 Score (Before fine-tuning): 0.00
Test Matthews Correlation Coefficient (Before fine-tuning): -0.03


Epoch 1/10: 100%|██████████| 123/123 [00:22<00:00,  5.43it/s]


Epoch 1/10, Loss: 0.3085


Epoch 2/10: 100%|██████████| 123/123 [00:22<00:00,  5.44it/s]


Epoch 2/10, Loss: 0.1430


Epoch 3/10: 100%|██████████| 123/123 [00:22<00:00,  5.44it/s]


Epoch 3/10, Loss: 0.1070


Epoch 4/10: 100%|██████████| 123/123 [00:22<00:00,  5.44it/s]


Epoch 4/10, Loss: 0.0947


Epoch 5/10: 100%|██████████| 123/123 [00:22<00:00,  5.44it/s]


Epoch 5/10, Loss: 0.0711


Epoch 6/10: 100%|██████████| 123/123 [00:22<00:00,  5.44it/s]


Epoch 6/10, Loss: 0.0490


Epoch 7/10: 100%|██████████| 123/123 [00:22<00:00,  5.43it/s]


Epoch 7/10, Loss: 0.0272


Epoch 8/10: 100%|██████████| 123/123 [00:22<00:00,  5.44it/s]


Epoch 8/10, Loss: 0.0137


Epoch 9/10: 100%|██████████| 123/123 [00:22<00:00,  5.43it/s]


Epoch 9/10, Loss: 0.0059


Epoch 10/10: 100%|██████████| 123/123 [00:22<00:00,  5.44it/s]


Epoch 10/10, Loss: 0.0052
Test Accuracy (After fine-tuning): 0.96%
Test F1 Score (After fine-tuning): 0.82
Test Matthews Correlation Coefficient (After fine-tuning): 0.81


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 2, Model distilbert-base-multilingual-cased: 1
Test Accuracy (Before fine-tuning): 0.10%
Test F1 Score (Before fine-tuning): 0.18
Test Matthews Correlation Coefficient (Before fine-tuning): 0.00


Epoch 1/10: 100%|██████████| 124/124 [00:22<00:00,  5.47it/s]


Epoch 1/10, Loss: 0.3259


Epoch 2/10: 100%|██████████| 124/124 [00:22<00:00,  5.47it/s]


Epoch 2/10, Loss: 0.1703


Epoch 3/10: 100%|██████████| 124/124 [00:22<00:00,  5.47it/s]


Epoch 3/10, Loss: 0.1291


Epoch 4/10: 100%|██████████| 124/124 [00:22<00:00,  5.47it/s]


Epoch 4/10, Loss: 0.1087


Epoch 5/10: 100%|██████████| 124/124 [00:22<00:00,  5.47it/s]


Epoch 5/10, Loss: 0.0947


Epoch 6/10: 100%|██████████| 124/124 [00:22<00:00,  5.47it/s]


Epoch 6/10, Loss: 0.0856


Epoch 7/10: 100%|██████████| 124/124 [00:22<00:00,  5.48it/s]


Epoch 7/10, Loss: 0.0637


Epoch 8/10: 100%|██████████| 124/124 [00:22<00:00,  5.48it/s]


Epoch 8/10, Loss: 0.0446


Epoch 9/10: 100%|██████████| 124/124 [00:22<00:00,  5.47it/s]


Epoch 9/10, Loss: 0.0233


Epoch 10/10: 100%|██████████| 124/124 [00:22<00:00,  5.48it/s]


Epoch 10/10, Loss: 0.0128
Test Accuracy (After fine-tuning): 0.97%
Test F1 Score (After fine-tuning): 0.83
Test Matthews Correlation Coefficient (After fine-tuning): 0.82


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 3, Model distilbert-base-multilingual-cased: 1
Test Accuracy (Before fine-tuning): 0.66%
Test F1 Score (Before fine-tuning): 0.22
Test Matthews Correlation Coefficient (Before fine-tuning): 0.10


Epoch 1/10: 100%|██████████| 124/124 [00:22<00:00,  5.47it/s]


Epoch 1/10, Loss: 0.3164


Epoch 2/10: 100%|██████████| 124/124 [00:22<00:00,  5.47it/s]


Epoch 2/10, Loss: 0.1505


Epoch 3/10: 100%|██████████| 124/124 [00:22<00:00,  5.46it/s]


Epoch 3/10, Loss: 0.1082


Epoch 4/10: 100%|██████████| 124/124 [00:22<00:00,  5.47it/s]


Epoch 4/10, Loss: 0.0931


Epoch 5/10: 100%|██████████| 124/124 [00:22<00:00,  5.48it/s]


Epoch 5/10, Loss: 0.0668


Epoch 6/10: 100%|██████████| 124/124 [00:22<00:00,  5.48it/s]


Epoch 6/10, Loss: 0.0439


Epoch 7/10: 100%|██████████| 124/124 [00:22<00:00,  5.48it/s]


Epoch 7/10, Loss: 0.0334


Epoch 8/10: 100%|██████████| 124/124 [00:22<00:00,  5.47it/s]


Epoch 8/10, Loss: 0.0093


Epoch 9/10: 100%|██████████| 124/124 [00:22<00:00,  5.47it/s]


Epoch 9/10, Loss: 0.0070


Epoch 10/10: 100%|██████████| 124/124 [00:22<00:00,  5.47it/s]


Epoch 10/10, Loss: 0.0048
Test Accuracy (After fine-tuning): 0.98%
Test F1 Score (After fine-tuning): 0.88
Test Matthews Correlation Coefficient (After fine-tuning): 0.87


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 4, Model distilbert-base-multilingual-cased: 1
Test Accuracy (Before fine-tuning): 0.08%
Test F1 Score (Before fine-tuning): 0.14
Test Matthews Correlation Coefficient (Before fine-tuning): 0.00


Epoch 1/10: 100%|██████████| 124/124 [00:22<00:00,  5.51it/s]


Epoch 1/10, Loss: 0.3123


Epoch 2/10: 100%|██████████| 124/124 [00:22<00:00,  5.51it/s]


Epoch 2/10, Loss: 0.0617


Epoch 3/10: 100%|██████████| 124/124 [00:22<00:00,  5.51it/s]


Epoch 3/10, Loss: 0.0377


Epoch 4/10: 100%|██████████| 124/124 [00:22<00:00,  5.51it/s]


Epoch 4/10, Loss: 0.0322


Epoch 5/10: 100%|██████████| 124/124 [00:22<00:00,  5.50it/s]


Epoch 5/10, Loss: 0.0206


Epoch 6/10: 100%|██████████| 124/124 [00:22<00:00,  5.51it/s]


Epoch 6/10, Loss: 0.0153


Epoch 7/10: 100%|██████████| 124/124 [00:22<00:00,  5.50it/s]


Epoch 7/10, Loss: 0.0074


Epoch 8/10: 100%|██████████| 124/124 [00:22<00:00,  5.51it/s]


Epoch 8/10, Loss: 0.0034


Epoch 9/10: 100%|██████████| 124/124 [00:22<00:00,  5.51it/s]


Epoch 9/10, Loss: 0.0028


Epoch 10/10: 100%|██████████| 124/124 [00:22<00:00,  5.51it/s]


Epoch 10/10, Loss: 0.0009
Test Accuracy (After fine-tuning): 0.83%
Test F1 Score (After fine-tuning): 0.46
Test Matthews Correlation Coefficient (After fine-tuning): 0.48
Training and evaluation of model  distilbert-base-multilingual-cased  complete!


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 1, Model xlm-roberta-base: 2
Test Accuracy (Before fine-tuning): 0.11%
Test F1 Score (Before fine-tuning): 0.20
Test Matthews Correlation Coefficient (Before fine-tuning): 0.00


Epoch 1/10: 100%|██████████| 123/123 [00:40<00:00,  3.07it/s]


Epoch 1/10, Loss: 0.3597


Epoch 2/10: 100%|██████████| 123/123 [00:40<00:00,  3.07it/s]


Epoch 2/10, Loss: 0.3106


Epoch 3/10: 100%|██████████| 123/123 [00:40<00:00,  3.06it/s]


Epoch 3/10, Loss: 0.3048


Epoch 4/10: 100%|██████████| 123/123 [00:40<00:00,  3.07it/s]


Epoch 4/10, Loss: 0.2125


Epoch 5/10: 100%|██████████| 123/123 [00:40<00:00,  3.07it/s]


Epoch 5/10, Loss: 0.1142


Epoch 6/10: 100%|██████████| 123/123 [00:40<00:00,  3.07it/s]


Epoch 6/10, Loss: 0.1068


Epoch 7/10: 100%|██████████| 123/123 [00:40<00:00,  3.07it/s]


Epoch 7/10, Loss: 0.0936


Epoch 8/10: 100%|██████████| 123/123 [00:40<00:00,  3.07it/s]


Epoch 8/10, Loss: 0.0831


Epoch 9/10: 100%|██████████| 123/123 [00:40<00:00,  3.07it/s]


Epoch 9/10, Loss: 0.0843


Epoch 10/10: 100%|██████████| 123/123 [00:40<00:00,  3.07it/s]


Epoch 10/10, Loss: 0.0622
Test Accuracy (After fine-tuning): 0.97%
Test F1 Score (After fine-tuning): 0.84
Test Matthews Correlation Coefficient (After fine-tuning): 0.82


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 2, Model xlm-roberta-base: 2
Test Accuracy (Before fine-tuning): 0.90%
Test F1 Score (Before fine-tuning): 0.00
Test Matthews Correlation Coefficient (Before fine-tuning): 0.00


Epoch 1/10: 100%|██████████| 124/124 [00:40<00:00,  3.09it/s]


Epoch 1/10, Loss: 0.3138


Epoch 2/10: 100%|██████████| 124/124 [00:40<00:00,  3.09it/s]


Epoch 2/10, Loss: 0.1460


Epoch 3/10: 100%|██████████| 124/124 [00:40<00:00,  3.09it/s]


Epoch 3/10, Loss: 0.1216


Epoch 4/10: 100%|██████████| 124/124 [00:40<00:00,  3.09it/s]


Epoch 4/10, Loss: 0.1139


Epoch 5/10: 100%|██████████| 124/124 [00:40<00:00,  3.09it/s]


Epoch 5/10, Loss: 0.0956


Epoch 6/10: 100%|██████████| 124/124 [00:40<00:00,  3.08it/s]


Epoch 6/10, Loss: 0.0849


Epoch 7/10: 100%|██████████| 124/124 [00:40<00:00,  3.09it/s]


Epoch 7/10, Loss: 0.0723


Epoch 8/10: 100%|██████████| 124/124 [00:40<00:00,  3.09it/s]


Epoch 8/10, Loss: 0.0527


Epoch 9/10: 100%|██████████| 124/124 [00:40<00:00,  3.09it/s]


Epoch 9/10, Loss: 0.0425


Epoch 10/10: 100%|██████████| 124/124 [00:40<00:00,  3.09it/s]


Epoch 10/10, Loss: 0.0254
Test Accuracy (After fine-tuning): 0.95%
Test F1 Score (After fine-tuning): 0.69
Test Matthews Correlation Coefficient (After fine-tuning): 0.69


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 3, Model xlm-roberta-base: 2
Test Accuracy (Before fine-tuning): 0.90%
Test F1 Score (Before fine-tuning): 0.00
Test Matthews Correlation Coefficient (Before fine-tuning): 0.00


Epoch 1/10: 100%|██████████| 124/124 [00:40<00:00,  3.09it/s]


Epoch 1/10, Loss: 0.3263


Epoch 2/10: 100%|██████████| 124/124 [00:40<00:00,  3.09it/s]


Epoch 2/10, Loss: 0.1857


Epoch 3/10: 100%|██████████| 124/124 [00:40<00:00,  3.10it/s]


Epoch 3/10, Loss: 0.1226


Epoch 4/10: 100%|██████████| 124/124 [00:39<00:00,  3.10it/s]


Epoch 4/10, Loss: 0.1113


Epoch 5/10: 100%|██████████| 124/124 [00:40<00:00,  3.10it/s]


Epoch 5/10, Loss: 0.1083


Epoch 6/10: 100%|██████████| 124/124 [00:40<00:00,  3.10it/s]


Epoch 6/10, Loss: 0.1024


Epoch 7/10: 100%|██████████| 124/124 [00:40<00:00,  3.09it/s]


Epoch 7/10, Loss: 0.0984


Epoch 8/10: 100%|██████████| 124/124 [00:40<00:00,  3.09it/s]


Epoch 8/10, Loss: 0.0876


Epoch 9/10: 100%|██████████| 124/124 [00:40<00:00,  3.10it/s]


Epoch 9/10, Loss: 0.0849


Epoch 10/10: 100%|██████████| 124/124 [00:40<00:00,  3.10it/s]


Epoch 10/10, Loss: 0.0706
Test Accuracy (After fine-tuning): 0.96%
Test F1 Score (After fine-tuning): 0.77
Test Matthews Correlation Coefficient (After fine-tuning): 0.76


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 4, Model xlm-roberta-base: 2
Test Accuracy (Before fine-tuning): 0.92%
Test F1 Score (Before fine-tuning): 0.00
Test Matthews Correlation Coefficient (Before fine-tuning): 0.00


Epoch 1/10: 100%|██████████| 124/124 [00:40<00:00,  3.08it/s]


Epoch 1/10, Loss: 0.3487


Epoch 2/10: 100%|██████████| 124/124 [00:40<00:00,  3.09it/s]


Epoch 2/10, Loss: 0.1610


Epoch 3/10: 100%|██████████| 124/124 [00:40<00:00,  3.09it/s]


Epoch 3/10, Loss: 0.0562


Epoch 4/10: 100%|██████████| 124/124 [00:40<00:00,  3.09it/s]


Epoch 4/10, Loss: 0.0527


Epoch 5/10: 100%|██████████| 124/124 [00:40<00:00,  3.09it/s]


Epoch 5/10, Loss: 0.0474


Epoch 6/10: 100%|██████████| 124/124 [00:40<00:00,  3.08it/s]


Epoch 6/10, Loss: 0.0453


Epoch 7/10: 100%|██████████| 124/124 [00:40<00:00,  3.08it/s]


Epoch 7/10, Loss: 0.0441


Epoch 8/10: 100%|██████████| 124/124 [00:40<00:00,  3.08it/s]


Epoch 8/10, Loss: 0.0404


Epoch 9/10: 100%|██████████| 124/124 [00:40<00:00,  3.08it/s]


Epoch 9/10, Loss: 0.0408


Epoch 10/10: 100%|██████████| 124/124 [00:40<00:00,  3.09it/s]


Epoch 10/10, Loss: 0.0395
Test Accuracy (After fine-tuning): 0.81%
Test F1 Score (After fine-tuning): 0.44
Test Matthews Correlation Coefficient (After fine-tuning): 0.46
Training and evaluation of model  xlm-roberta-base  complete!


tokenizer_config.json:   0%|          | 0.00/453 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/16.3M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/23.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/173 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/879 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at timpal0l/mdeberta-v3-base-squad2 and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 1, Model timpal0l/mdeberta-v3-base-squad2: 3
Test Accuracy (Before fine-tuning): 0.11%
Test F1 Score (Before fine-tuning): 0.20
Test Matthews Correlation Coefficient (Before fine-tuning): 0.00


Epoch 1/10: 100%|██████████| 123/123 [00:52<00:00,  2.34it/s]


Epoch 1/10, Loss: 0.2938


Epoch 2/10: 100%|██████████| 123/123 [00:52<00:00,  2.34it/s]


Epoch 2/10, Loss: 0.1436


Epoch 3/10: 100%|██████████| 123/123 [00:52<00:00,  2.35it/s]


Epoch 3/10, Loss: 0.1148


Epoch 4/10: 100%|██████████| 123/123 [00:52<00:00,  2.34it/s]


Epoch 4/10, Loss: 0.1103


Epoch 5/10: 100%|██████████| 123/123 [00:52<00:00,  2.34it/s]


Epoch 5/10, Loss: 0.0990


Epoch 6/10: 100%|██████████| 123/123 [00:52<00:00,  2.34it/s]


Epoch 6/10, Loss: 0.0974


Epoch 7/10: 100%|██████████| 123/123 [00:52<00:00,  2.35it/s]


Epoch 7/10, Loss: 0.0867


Epoch 8/10: 100%|██████████| 123/123 [00:52<00:00,  2.34it/s]


Epoch 8/10, Loss: 0.0730


Epoch 9/10: 100%|██████████| 123/123 [00:52<00:00,  2.34it/s]


Epoch 9/10, Loss: 0.0546


Epoch 10/10: 100%|██████████| 123/123 [00:52<00:00,  2.34it/s]


Epoch 10/10, Loss: 0.0436
Test Accuracy (After fine-tuning): 0.98%
Test F1 Score (After fine-tuning): 0.89
Test Matthews Correlation Coefficient (After fine-tuning): 0.88


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at timpal0l/mdeberta-v3-base-squad2 and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 2, Model timpal0l/mdeberta-v3-base-squad2: 3
Test Accuracy (Before fine-tuning): 0.90%
Test F1 Score (Before fine-tuning): 0.00
Test Matthews Correlation Coefficient (Before fine-tuning): 0.00


Epoch 1/10: 100%|██████████| 124/124 [00:52<00:00,  2.36it/s]


Epoch 1/10, Loss: 0.2973


Epoch 2/10: 100%|██████████| 124/124 [00:52<00:00,  2.36it/s]


Epoch 2/10, Loss: 0.1461


Epoch 3/10: 100%|██████████| 124/124 [00:52<00:00,  2.36it/s]


Epoch 3/10, Loss: 0.1192


Epoch 4/10: 100%|██████████| 124/124 [00:52<00:00,  2.36it/s]


Epoch 4/10, Loss: 0.1112


Epoch 5/10: 100%|██████████| 124/124 [00:52<00:00,  2.36it/s]


Epoch 5/10, Loss: 0.1015


Epoch 6/10: 100%|██████████| 124/124 [00:52<00:00,  2.36it/s]


Epoch 6/10, Loss: 0.0933


Epoch 7/10: 100%|██████████| 124/124 [00:52<00:00,  2.36it/s]


Epoch 7/10, Loss: 0.0814


Epoch 8/10: 100%|██████████| 124/124 [00:52<00:00,  2.36it/s]


Epoch 8/10, Loss: 0.0691


Epoch 9/10: 100%|██████████| 124/124 [00:52<00:00,  2.36it/s]


Epoch 9/10, Loss: 0.0628


Epoch 10/10: 100%|██████████| 124/124 [00:52<00:00,  2.36it/s]


Epoch 10/10, Loss: 0.0559
Test Accuracy (After fine-tuning): 0.96%
Test F1 Score (After fine-tuning): 0.77
Test Matthews Correlation Coefficient (After fine-tuning): 0.76


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at timpal0l/mdeberta-v3-base-squad2 and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 3, Model timpal0l/mdeberta-v3-base-squad2: 3
Test Accuracy (Before fine-tuning): 0.80%
Test F1 Score (Before fine-tuning): 0.19
Test Matthews Correlation Coefficient (Before fine-tuning): 0.09


Epoch 1/10: 100%|██████████| 124/124 [00:52<00:00,  2.37it/s]


Epoch 1/10, Loss: 0.3102


Epoch 2/10: 100%|██████████| 124/124 [00:52<00:00,  2.37it/s]


Epoch 2/10, Loss: 0.1339


Epoch 3/10: 100%|██████████| 124/124 [00:52<00:00,  2.37it/s]


Epoch 3/10, Loss: 0.1156


Epoch 4/10: 100%|██████████| 124/124 [00:52<00:00,  2.37it/s]


Epoch 4/10, Loss: 0.1080


Epoch 5/10: 100%|██████████| 124/124 [00:52<00:00,  2.37it/s]


Epoch 5/10, Loss: 0.1103


Epoch 6/10: 100%|██████████| 124/124 [00:52<00:00,  2.37it/s]


Epoch 6/10, Loss: 0.0958


Epoch 7/10: 100%|██████████| 124/124 [00:52<00:00,  2.37it/s]


Epoch 7/10, Loss: 0.0787


Epoch 8/10: 100%|██████████| 124/124 [00:52<00:00,  2.37it/s]


Epoch 8/10, Loss: 0.0707


Epoch 9/10: 100%|██████████| 124/124 [00:52<00:00,  2.37it/s]


Epoch 9/10, Loss: 0.0580


Epoch 10/10: 100%|██████████| 124/124 [00:52<00:00,  2.37it/s]


Epoch 10/10, Loss: 0.0407
Test Accuracy (After fine-tuning): 0.97%
Test F1 Score (After fine-tuning): 0.85
Test Matthews Correlation Coefficient (After fine-tuning): 0.84


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at timpal0l/mdeberta-v3-base-squad2 and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 4, Model timpal0l/mdeberta-v3-base-squad2: 3
Test Accuracy (Before fine-tuning): 0.49%
Test F1 Score (Before fine-tuning): 0.09
Test Matthews Correlation Coefficient (Before fine-tuning): -0.09


Epoch 1/10: 100%|██████████| 124/124 [00:52<00:00,  2.36it/s]


Epoch 1/10, Loss: 0.3204


Epoch 2/10: 100%|██████████| 124/124 [00:52<00:00,  2.36it/s]


Epoch 2/10, Loss: 0.1023


Epoch 3/10: 100%|██████████| 124/124 [00:52<00:00,  2.36it/s]


Epoch 3/10, Loss: 0.0571


Epoch 4/10: 100%|██████████| 124/124 [00:52<00:00,  2.36it/s]


Epoch 4/10, Loss: 0.0595


Epoch 5/10: 100%|██████████| 124/124 [00:52<00:00,  2.36it/s]


Epoch 5/10, Loss: 0.0447


Epoch 6/10: 100%|██████████| 124/124 [00:52<00:00,  2.36it/s]


Epoch 6/10, Loss: 0.0357


Epoch 7/10: 100%|██████████| 124/124 [00:52<00:00,  2.36it/s]


Epoch 7/10, Loss: 0.0290


Epoch 8/10: 100%|██████████| 124/124 [00:52<00:00,  2.36it/s]


Epoch 8/10, Loss: 0.0249


Epoch 9/10: 100%|██████████| 124/124 [00:52<00:00,  2.36it/s]


Epoch 9/10, Loss: 0.0193


Epoch 10/10: 100%|██████████| 124/124 [00:52<00:00,  2.36it/s]


Epoch 10/10, Loss: 0.0127
Test Accuracy (After fine-tuning): 0.83%
Test F1 Score (After fine-tuning): 0.46
Test Matthews Correlation Coefficient (After fine-tuning): 0.48
Training and evaluation of model  timpal0l/mdeberta-v3-base-squad2  complete!


tokenizer_config.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/430 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/471M [00:00<?, ?B/s]

  return self.fget.__get__(instance, owner)()
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/Multilingual-MiniLM-L12-H384 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 1, Model microsoft/Multilingual-MiniLM-L12-H384: 4
Test Accuracy (Before fine-tuning): 0.89%
Test F1 Score (Before fine-tuning): 0.00
Test Matthews Correlation Coefficient (Before fine-tuning): 0.00


Epoch 1/10: 100%|██████████| 123/123 [00:14<00:00,  8.66it/s]


Epoch 1/10, Loss: 0.3872


Epoch 2/10: 100%|██████████| 123/123 [00:14<00:00,  8.71it/s]


Epoch 2/10, Loss: 0.3071


Epoch 3/10: 100%|██████████| 123/123 [00:14<00:00,  8.69it/s]


Epoch 3/10, Loss: 0.2802


Epoch 4/10: 100%|██████████| 123/123 [00:14<00:00,  8.70it/s]


Epoch 4/10, Loss: 0.1623


Epoch 5/10: 100%|██████████| 123/123 [00:14<00:00,  8.68it/s]


Epoch 5/10, Loss: 0.1289


Epoch 6/10: 100%|██████████| 123/123 [00:14<00:00,  8.68it/s]


Epoch 6/10, Loss: 0.1231


Epoch 7/10: 100%|██████████| 123/123 [00:14<00:00,  8.69it/s]


Epoch 7/10, Loss: 0.1150


Epoch 8/10: 100%|██████████| 123/123 [00:14<00:00,  8.69it/s]


Epoch 8/10, Loss: 0.1086


Epoch 9/10: 100%|██████████| 123/123 [00:14<00:00,  8.68it/s]


Epoch 9/10, Loss: 0.1013


Epoch 10/10: 100%|██████████| 123/123 [00:14<00:00,  8.70it/s]


Epoch 10/10, Loss: 0.0973
Test Accuracy (After fine-tuning): 0.97%
Test F1 Score (After fine-tuning): 0.88
Test Matthews Correlation Coefficient (After fine-tuning): 0.87


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/Multilingual-MiniLM-L12-H384 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 2, Model microsoft/Multilingual-MiniLM-L12-H384: 4
Test Accuracy (Before fine-tuning): 0.90%
Test F1 Score (Before fine-tuning): 0.00
Test Matthews Correlation Coefficient (Before fine-tuning): 0.00


Epoch 1/10: 100%|██████████| 124/124 [00:14<00:00,  8.71it/s]


Epoch 1/10, Loss: 0.3908


Epoch 2/10: 100%|██████████| 124/124 [00:14<00:00,  8.74it/s]


Epoch 2/10, Loss: 0.3118


Epoch 3/10: 100%|██████████| 124/124 [00:14<00:00,  8.72it/s]


Epoch 3/10, Loss: 0.3100


Epoch 4/10: 100%|██████████| 124/124 [00:14<00:00,  8.72it/s]


Epoch 4/10, Loss: 0.2737


Epoch 5/10: 100%|██████████| 124/124 [00:14<00:00,  8.72it/s]


Epoch 5/10, Loss: 0.1478


Epoch 6/10: 100%|██████████| 124/124 [00:14<00:00,  8.73it/s]


Epoch 6/10, Loss: 0.1307


Epoch 7/10: 100%|██████████| 124/124 [00:14<00:00,  8.74it/s]


Epoch 7/10, Loss: 0.1143


Epoch 8/10: 100%|██████████| 124/124 [00:14<00:00,  8.72it/s]


Epoch 8/10, Loss: 0.1088


Epoch 9/10: 100%|██████████| 124/124 [00:14<00:00,  8.73it/s]


Epoch 9/10, Loss: 0.1039


Epoch 10/10: 100%|██████████| 124/124 [00:14<00:00,  8.74it/s]


Epoch 10/10, Loss: 0.0963
Test Accuracy (After fine-tuning): 0.97%
Test F1 Score (After fine-tuning): 0.86
Test Matthews Correlation Coefficient (After fine-tuning): 0.84


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/Multilingual-MiniLM-L12-H384 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 3, Model microsoft/Multilingual-MiniLM-L12-H384: 4
Test Accuracy (Before fine-tuning): 0.10%
Test F1 Score (Before fine-tuning): 0.18
Test Matthews Correlation Coefficient (Before fine-tuning): 0.00


Epoch 1/10: 100%|██████████| 124/124 [00:14<00:00,  8.76it/s]


Epoch 1/10, Loss: 0.4017


Epoch 2/10: 100%|██████████| 124/124 [00:14<00:00,  8.78it/s]


Epoch 2/10, Loss: 0.3126


Epoch 3/10: 100%|██████████| 124/124 [00:14<00:00,  8.78it/s]


Epoch 3/10, Loss: 0.3122


Epoch 4/10: 100%|██████████| 124/124 [00:14<00:00,  8.77it/s]


Epoch 4/10, Loss: 0.3093


Epoch 5/10: 100%|██████████| 124/124 [00:14<00:00,  8.78it/s]


Epoch 5/10, Loss: 0.2744


Epoch 6/10: 100%|██████████| 124/124 [00:14<00:00,  8.77it/s]


Epoch 6/10, Loss: 0.1601


Epoch 7/10: 100%|██████████| 124/124 [00:14<00:00,  8.79it/s]


Epoch 7/10, Loss: 0.1464


Epoch 8/10: 100%|██████████| 124/124 [00:14<00:00,  8.76it/s]


Epoch 8/10, Loss: 0.1254


Epoch 9/10: 100%|██████████| 124/124 [00:14<00:00,  8.79it/s]


Epoch 9/10, Loss: 0.1171


Epoch 10/10: 100%|██████████| 124/124 [00:14<00:00,  8.75it/s]


Epoch 10/10, Loss: 0.1107
Test Accuracy (After fine-tuning): 0.98%
Test F1 Score (After fine-tuning): 0.91
Test Matthews Correlation Coefficient (After fine-tuning): 0.90


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/Multilingual-MiniLM-L12-H384 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 4, Model microsoft/Multilingual-MiniLM-L12-H384: 4
Test Accuracy (Before fine-tuning): 0.92%
Test F1 Score (Before fine-tuning): 0.00
Test Matthews Correlation Coefficient (Before fine-tuning): 0.00


Epoch 1/10: 100%|██████████| 124/124 [00:14<00:00,  8.72it/s]


Epoch 1/10, Loss: 0.4058


Epoch 2/10: 100%|██████████| 124/124 [00:14<00:00,  8.68it/s]


Epoch 2/10, Loss: 0.3286


Epoch 3/10: 100%|██████████| 124/124 [00:14<00:00,  8.71it/s]


Epoch 3/10, Loss: 0.3244


Epoch 4/10: 100%|██████████| 124/124 [00:14<00:00,  8.70it/s]


Epoch 4/10, Loss: 0.2324


Epoch 5/10: 100%|██████████| 124/124 [00:14<00:00,  8.74it/s]


Epoch 5/10, Loss: 0.0798


Epoch 6/10: 100%|██████████| 124/124 [00:14<00:00,  8.72it/s]


Epoch 6/10, Loss: 0.0572


Epoch 7/10: 100%|██████████| 124/124 [00:14<00:00,  8.74it/s]


Epoch 7/10, Loss: 0.0475


Epoch 8/10: 100%|██████████| 124/124 [00:14<00:00,  8.72it/s]


Epoch 8/10, Loss: 0.0450


Epoch 9/10: 100%|██████████| 124/124 [00:14<00:00,  8.73it/s]


Epoch 9/10, Loss: 0.0386


Epoch 10/10: 100%|██████████| 124/124 [00:14<00:00,  8.71it/s]


Epoch 10/10, Loss: 0.0356
Test Accuracy (After fine-tuning): 0.83%
Test F1 Score (After fine-tuning): 0.47
Test Matthews Correlation Coefficient (After fine-tuning): 0.50
Training and evaluation of model  microsoft/Multilingual-MiniLM-L12-H384  complete!
Average Accuracy After Each Fold: 0.9324755493842067
Average F1 Score After Each Fold: 0.7291933387003103
Average Matthews Correlation Coefficient After Each Fold: 0.7285809654319604
