In [1]:
# =========================
# INSTALLS (if needed)
# =========================
# !pip install -U sentence-transformers scikit-learn torch transformers tqdm

# =========================
# IMPORTS
# =========================
import numpy as np
import pandas as pd
import torch

from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW 
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix
from tqdm import tqdm
import os, random, torch, numpy as np

def seed_everything(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    ##torch.backends.cudnn.deterministic = False
    ##torch.backends.cudnn.benchmark = True
    torch.use_deterministic_algorithms(True, warn_only=False)
    print(f"[INFO] All seeds set to {seed}")

seed_everything(42)


def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

g = torch.Generator()
g.manual_seed(42)

def free_gpu():
    torch.cuda.empty_cache()
    torch.cuda.ipc_collect()
##set_seed(42)
# =========================
# CONFIG
# =========================



MODELS_NAMES= ["aubmindlab/bert-base-arabert","UBC-NLP/MARBERTv2",
              "aubmindlab/araelectra-base-generator",
               "3ebdola/Dialectal-Arabic-XLM-R-Base", 
               "aubmindlab/araelectra-base-discriminator",
               "bashar-talafha/multi-dialect-bert-base-arabic",
              "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
              "sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking",
              "sentence-transformers/distiluse-base-multilingual-cased",
               "sentence-transformers/clip-ViT-B-32-multilingual-v1",
               "sentence-transformers/distiluse-base-multilingual-cased-v1",
               "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
              ]


NUM_EPOCHS = 3
BATCH_SIZE = 8
LR = 2e-5
N_SPLITS = 5
MAX_LEN = 64
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# =========================
# LOAD YOUR DATA
# =========================
# Example:
df = pd.read_csv("/kaggle/input/ekafnewsforkhawla/shuffled_cleaned_text_file for khawla.txt",sep='\t',encoding="utf-8")
df = df[["preprocess1", "label", "dataset"]]
# df must contain: text | label

# Dummy placeholder (REMOVE THIS)
# df = pd.DataFrame({
#     "text": ["example text one", "example text two"],
#     "label": [0, 1]
# })

texts = df["preprocess1"].astype(str).values
labels = df["label"].astype(int).values
num_labels = len(np.unique(labels))

# =========================
# DATASET CLASS
# =========================
class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        enc = self.tokenizer(
            self.texts[idx],
            truncation=True,
            padding="max_length",
            max_length=MAX_LEN,
            return_tensors="pt"
        )
        return {
            "input_ids": enc["input_ids"].squeeze(0),
            "attention_mask": enc["attention_mask"].squeeze(0),
            "labels": torch.tensor(self.labels[idx], dtype=torch.long)
        }
for MODEL_NAME in MODELS_NAMES:
    # =========================
    # TOKENIZER
    # =========================
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

    # =========================
    # CROSS VALIDATION
    # =========================
    strat = df['label']

    skf = StratifiedKFold(n_splits=N_SPLITS, shuffle=False, random_state=None)


    all_preds = []
    all_labels = []

    for fold, (train_idx, val_idx) in enumerate(skf.split(texts, strat), 1):
        free_gpu()
        print(f"\n{'='*30}")
        print(f"FOLD {fold}")
        print(f"{'='*30}")
        
        # Datasets
        train_ds = TextDataset(texts[train_idx], labels[train_idx], tokenizer)
        val_ds   = TextDataset(texts[val_idx], labels[val_idx], tokenizer)

        train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=False)
        val_loader   = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False)

        # Model
        model = AutoModelForSequenceClassification.from_pretrained(
            MODEL_NAME,
            num_labels=num_labels
        ).to(DEVICE)

        optimizer = AdamW(model.parameters(), lr=LR)

        # =========================
        # TRAINING
        # =========================
        model.train()
        for epoch in range(NUM_EPOCHS):
            loop = tqdm(train_loader, desc=f"Epoch {epoch+1}")
            for batch in loop:
                optimizer.zero_grad()

                outputs = model(
                    input_ids=batch["input_ids"].to(DEVICE),
                    attention_mask=batch["attention_mask"].to(DEVICE),
                    labels=batch["labels"].to(DEVICE)
                )

                loss = outputs.loss
                loss.backward()
                optimizer.step()

                loop.set_postfix(loss=loss.item())

        # =========================
        # VALIDATION
        # =========================
        model.eval()
        fold_preds = []
        fold_labels = []

        with torch.no_grad():
            for batch in val_loader:
                outputs = model(
                    input_ids=batch["input_ids"].to(DEVICE),
                    attention_mask=batch["attention_mask"].to(DEVICE)
                )
                preds = torch.argmax(outputs.logits, dim=1)

                fold_preds.extend(preds.cpu().numpy())
                fold_labels.extend(batch["labels"].numpy())

        # Store global results
        all_preds.extend(fold_preds)
        all_labels.extend(fold_labels)

        # =========================
        # REPORTS PER FOLD
        # =========================
        print("\nClassification Report (Fold {})".format(fold))
        print(classification_report(fold_labels, fold_preds, digits=4))

        print("Confusion Matrix (Fold {})".format(fold))
        print(confusion_matrix(fold_labels, fold_preds))

    # =========================
    # FINAL GLOBAL RESULTS
    # =========================
    print(f"\n{'✅'*50}")
    print("OVERALL RESULTS (ALL FOLDS)")
    print(MODEL_NAME)
    print(f"{'✅'*50}")

    print("\nFinal Classification Report")
    print(classification_report(all_labels, all_preds, digits=4))

    print("Final Confusion Matrix")
    print(confusion_matrix(all_labels, all_preds))
    print(f"\n{'✅'*50}")


[INFO] All seeds set to 42


tokenizer_config.json:   0%|          | 0.00/637 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/578 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]


FOLD 1


2026-02-01 18:48:52.655615: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1769971732.830493      55 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1769971732.882622      55 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1769971733.316529      55 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1769971733.316567      55 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1769971733.316570      55 computation_placer.cc:177] computation placer alr

model.safetensors:   0%|          | 0.00/543M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:41<00:00, 12.39it/s, loss=0.567] 
Epoch 2: 100%|██████████| 514/514 [00:40<00:00, 12.63it/s, loss=0.107]  
Epoch 3: 100%|██████████| 514/514 [00:40<00:00, 12.62it/s, loss=0.185]  



Classification Report (Fold 1)
              precision    recall  f1-score   support

           0     0.7560    0.8088    0.7815       272
           1     0.9294    0.9061    0.9176       756

    accuracy                         0.8804      1028
   macro avg     0.8427    0.8575    0.8496      1028
weighted avg     0.8836    0.8804    0.8816      1028

Confusion Matrix (Fold 1)
[[220  52]
 [ 71 685]]

FOLD 2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:40<00:00, 12.59it/s, loss=0.344] 
Epoch 2: 100%|██████████| 514/514 [00:40<00:00, 12.59it/s, loss=0.0528] 
Epoch 3: 100%|██████████| 514/514 [00:40<00:00, 12.59it/s, loss=0.00665] 



Classification Report (Fold 2)
              precision    recall  f1-score   support

           0     0.6882    0.8603    0.7647       272
           1     0.9448    0.8598    0.9003       756

    accuracy                         0.8599      1028
   macro avg     0.8165    0.8600    0.8325      1028
weighted avg     0.8769    0.8599    0.8644      1028

Confusion Matrix (Fold 2)
[[234  38]
 [106 650]]

FOLD 3


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:40<00:00, 12.58it/s, loss=0.226] 
Epoch 2: 100%|██████████| 514/514 [00:40<00:00, 12.58it/s, loss=0.0267] 
Epoch 3: 100%|██████████| 514/514 [00:40<00:00, 12.59it/s, loss=0.0585] 



Classification Report (Fold 3)
              precision    recall  f1-score   support

           0     0.8197    0.7326    0.7737       273
           1     0.9069    0.9417    0.9240       755

    accuracy                         0.8862      1028
   macro avg     0.8633    0.8372    0.8488      1028
weighted avg     0.8837    0.8862    0.8841      1028

Confusion Matrix (Fold 3)
[[200  73]
 [ 44 711]]

FOLD 4


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:40<00:00, 12.58it/s, loss=0.64]  
Epoch 2: 100%|██████████| 514/514 [00:40<00:00, 12.59it/s, loss=0.0584] 
Epoch 3: 100%|██████████| 514/514 [00:40<00:00, 12.58it/s, loss=0.095]   



Classification Report (Fold 4)
              precision    recall  f1-score   support

           0     0.7857    0.6875    0.7333       272
           1     0.8923    0.9325    0.9119       755

    accuracy                         0.8676      1027
   macro avg     0.8390    0.8100    0.8226      1027
weighted avg     0.8640    0.8676    0.8646      1027

Confusion Matrix (Fold 4)
[[187  85]
 [ 51 704]]

FOLD 5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:40<00:00, 12.58it/s, loss=0.155] 
Epoch 2: 100%|██████████| 514/514 [00:40<00:00, 12.61it/s, loss=0.0119] 
Epoch 3: 100%|██████████| 514/514 [00:40<00:00, 12.61it/s, loss=0.00975] 



Classification Report (Fold 5)
              precision    recall  f1-score   support

           0     0.7574    0.7574    0.7574       272
           1     0.9126    0.9126    0.9126       755

    accuracy                         0.8715      1027
   macro avg     0.8350    0.8350    0.8350      1027
weighted avg     0.8715    0.8715    0.8715      1027

Confusion Matrix (Fold 5)
[[206  66]
 [ 66 689]]

✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅
OVERALL RESULTS (ALL FOLDS)
aubmindlab/bert-base-arabert
✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅

Final Classification Report
              precision    recall  f1-score   support

           0     0.7560    0.7693    0.7626      1361
           1     0.9163    0.9105    0.9134      3777

    accuracy                         0.8731      5138
   macro avg     0.8361    0.8399    0.8380      5138
weighted avg     0.8739    0.8731    0.8735      5138

Final Confusion Matrix
[[1047  314]
 [ 338 3439]]

✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅

tokenizer_config.json:   0%|          | 0.00/439 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]


FOLD 1


config.json:   0%|          | 0.00/757 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/654M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERTv2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1:   1%|          | 5/514 [00:00<00:44, 11.44it/s, loss=0.416]

model.safetensors:   0%|          | 0.00/654M [00:00<?, ?B/s]

Epoch 1: 100%|██████████| 514/514 [00:44<00:00, 11.58it/s, loss=0.314] 
Epoch 2: 100%|██████████| 514/514 [00:43<00:00, 11.84it/s, loss=0.0671] 
Epoch 3: 100%|██████████| 514/514 [00:43<00:00, 11.86it/s, loss=0.00374]



Classification Report (Fold 1)
              precision    recall  f1-score   support

           0     0.8715    0.7978    0.8330       272
           1     0.9294    0.9577    0.9433       756

    accuracy                         0.9154      1028
   macro avg     0.9004    0.8777    0.8882      1028
weighted avg     0.9141    0.9154    0.9141      1028

Confusion Matrix (Fold 1)
[[217  55]
 [ 32 724]]

FOLD 2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERTv2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:43<00:00, 11.77it/s, loss=0.259] 
Epoch 2: 100%|██████████| 514/514 [00:43<00:00, 11.83it/s, loss=0.0207] 
Epoch 3: 100%|██████████| 514/514 [00:43<00:00, 11.84it/s, loss=0.00496]



Classification Report (Fold 2)
              precision    recall  f1-score   support

           0     0.8225    0.8346    0.8285       272
           1     0.9402    0.9352    0.9377       756

    accuracy                         0.9086      1028
   macro avg     0.8813    0.8849    0.8831      1028
weighted avg     0.9090    0.9086    0.9088      1028

Confusion Matrix (Fold 2)
[[227  45]
 [ 49 707]]

FOLD 3


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERTv2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:43<00:00, 11.83it/s, loss=0.239] 
Epoch 2: 100%|██████████| 514/514 [00:43<00:00, 11.85it/s, loss=0.0344] 
Epoch 3: 100%|██████████| 514/514 [00:43<00:00, 11.86it/s, loss=0.00929]



Classification Report (Fold 3)
              precision    recall  f1-score   support

           0     0.8615    0.7289    0.7897       273
           1     0.9072    0.9576    0.9317       755

    accuracy                         0.8969      1028
   macro avg     0.8843    0.8433    0.8607      1028
weighted avg     0.8950    0.8969    0.8940      1028

Confusion Matrix (Fold 3)
[[199  74]
 [ 32 723]]

FOLD 4


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERTv2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:43<00:00, 11.85it/s, loss=0.216] 
Epoch 2: 100%|██████████| 514/514 [00:43<00:00, 11.84it/s, loss=0.0304] 
Epoch 3: 100%|██████████| 514/514 [00:43<00:00, 11.86it/s, loss=0.118]  



Classification Report (Fold 4)
              precision    recall  f1-score   support

           0     0.7196    0.8493    0.7791       272
           1     0.9419    0.8808    0.9103       755

    accuracy                         0.8724      1027
   macro avg     0.8308    0.8650    0.8447      1027
weighted avg     0.8831    0.8724    0.8756      1027

Confusion Matrix (Fold 4)
[[231  41]
 [ 90 665]]

FOLD 5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERTv2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:43<00:00, 11.86it/s, loss=0.2]   
Epoch 2: 100%|██████████| 514/514 [00:43<00:00, 11.87it/s, loss=0.245]  
Epoch 3: 100%|██████████| 514/514 [00:43<00:00, 11.87it/s, loss=0.017]  



Classification Report (Fold 5)
              precision    recall  f1-score   support

           0     0.8879    0.7279    0.8000       272
           1     0.9080    0.9669    0.9365       755

    accuracy                         0.9036      1027
   macro avg     0.8979    0.8474    0.8682      1027
weighted avg     0.9026    0.9036    0.9003      1027

Confusion Matrix (Fold 5)
[[198  74]
 [ 25 730]]

✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅
OVERALL RESULTS (ALL FOLDS)
UBC-NLP/MARBERTv2
✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅

Final Classification Report
              precision    recall  f1-score   support

           0     0.8246    0.7877    0.8057      1361
           1     0.9247    0.9396    0.9321      3777

    accuracy                         0.8994      5138
   macro avg     0.8747    0.8636    0.8689      5138
weighted avg     0.8982    0.8994    0.8986      5138

Final Confusion Matrix
[[1072  289]
 [ 228 3549]]

✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅

tokenizer_config.json:   0%|          | 0.00/393 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/499 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]


FOLD 1


model.safetensors:   0%|          | 0.00/238M [00:00<?, ?B/s]

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at aubmindlab/araelectra-base-generator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:32<00:00, 15.98it/s, loss=0.777]
Epoch 2: 100%|██████████| 514/514 [00:32<00:00, 16.05it/s, loss=0.661] 
Epoch 3: 100%|██████████| 514/514 [00:32<00:00, 16.06it/s, loss=0.72]  



Classification Report (Fold 1)
              precision    recall  f1-score   support

           0     0.7834    0.7978    0.7905       272
           1     0.9268    0.9206    0.9237       756

    accuracy                         0.8881      1028
   macro avg     0.8551    0.8592    0.8571      1028
weighted avg     0.8888    0.8881    0.8885      1028

Confusion Matrix (Fold 1)
[[217  55]
 [ 60 696]]

FOLD 2


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at aubmindlab/araelectra-base-generator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:32<00:00, 15.97it/s, loss=0.693]
Epoch 2: 100%|██████████| 514/514 [00:32<00:00, 16.01it/s, loss=0.622] 
Epoch 3: 100%|██████████| 514/514 [00:31<00:00, 16.08it/s, loss=0.631] 



Classification Report (Fold 2)
              precision    recall  f1-score   support

           0     0.8051    0.8051    0.8051       272
           1     0.9299    0.9299    0.9299       756

    accuracy                         0.8969      1028
   macro avg     0.8675    0.8675    0.8675      1028
weighted avg     0.8969    0.8969    0.8969      1028

Confusion Matrix (Fold 2)
[[219  53]
 [ 53 703]]

FOLD 3


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at aubmindlab/araelectra-base-generator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:32<00:00, 15.98it/s, loss=1.04] 
Epoch 2: 100%|██████████| 514/514 [00:32<00:00, 16.05it/s, loss=1.06]  
Epoch 3: 100%|██████████| 514/514 [00:32<00:00, 16.06it/s, loss=0.653] 



Classification Report (Fold 3)
              precision    recall  f1-score   support

           0     0.6855    0.7985    0.7377       273
           1     0.9225    0.8675    0.8942       755

    accuracy                         0.8492      1028
   macro avg     0.8040    0.8330    0.8160      1028
weighted avg     0.8596    0.8492    0.8526      1028

Confusion Matrix (Fold 3)
[[218  55]
 [100 655]]

FOLD 4


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at aubmindlab/araelectra-base-generator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:32<00:00, 16.04it/s, loss=0.638]
Epoch 2: 100%|██████████| 514/514 [00:31<00:00, 16.07it/s, loss=0.403] 
Epoch 3: 100%|██████████| 514/514 [00:32<00:00, 16.02it/s, loss=0.142] 



Classification Report (Fold 4)
              precision    recall  f1-score   support

           0     0.7647    0.7169    0.7400       272
           1     0.9003    0.9205    0.9103       755

    accuracy                         0.8666      1027
   macro avg     0.8325    0.8187    0.8252      1027
weighted avg     0.8644    0.8666    0.8652      1027

Confusion Matrix (Fold 4)
[[195  77]
 [ 60 695]]

FOLD 5


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at aubmindlab/araelectra-base-generator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:32<00:00, 16.01it/s, loss=0.279]
Epoch 2: 100%|██████████| 514/514 [00:32<00:00, 16.01it/s, loss=0.0781]
Epoch 3: 100%|██████████| 514/514 [00:32<00:00, 16.00it/s, loss=0.0363]



Classification Report (Fold 5)
              precision    recall  f1-score   support

           0     0.7732    0.8272    0.7993       272
           1     0.9361    0.9126    0.9242       755

    accuracy                         0.8900      1027
   macro avg     0.8547    0.8699    0.8618      1027
weighted avg     0.8930    0.8900    0.8911      1027

Confusion Matrix (Fold 5)
[[225  47]
 [ 66 689]]

✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅
OVERALL RESULTS (ALL FOLDS)
aubmindlab/araelectra-base-generator
✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅

Final Classification Report
              precision    recall  f1-score   support

           0     0.7601    0.7891    0.7743      1361
           1     0.9230    0.9102    0.9166      3777

    accuracy                         0.8782      5138
   macro avg     0.8415    0.8497    0.8454      5138
weighted avg     0.8798    0.8782    0.8789      5138

Final Confusion Matrix
[[1074  287]
 [ 339 3438]]

✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅

tokenizer_config.json:   0%|          | 0.00/536 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]


FOLD 1


config.json:   0%|          | 0.00/753 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at 3ebdola/Dialectal-Arabic-XLM-R-Base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1:   1%|          | 3/514 [00:00<01:00,  8.39it/s, loss=0.48] 

model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

Epoch 1: 100%|██████████| 514/514 [00:56<00:00,  9.13it/s, loss=0.411] 
Epoch 2: 100%|██████████| 514/514 [00:54<00:00,  9.39it/s, loss=0.0734] 
Epoch 3: 100%|██████████| 514/514 [00:54<00:00,  9.36it/s, loss=0.0553]  



Classification Report (Fold 1)
              precision    recall  f1-score   support

           0     0.7047    0.8860    0.7850       272
           1     0.9548    0.8664    0.9085       756

    accuracy                         0.8716      1028
   macro avg     0.8297    0.8762    0.8467      1028
weighted avg     0.8886    0.8716    0.8758      1028

Confusion Matrix (Fold 1)
[[241  31]
 [101 655]]

FOLD 2


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at 3ebdola/Dialectal-Arabic-XLM-R-Base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:54<00:00,  9.40it/s, loss=0.585] 
Epoch 2: 100%|██████████| 514/514 [00:54<00:00,  9.39it/s, loss=0.23]   
Epoch 3: 100%|██████████| 514/514 [00:54<00:00,  9.40it/s, loss=0.018]   



Classification Report (Fold 2)
              precision    recall  f1-score   support

           0     0.7251    0.8824    0.7960       272
           1     0.9541    0.8796    0.9153       756

    accuracy                         0.8804      1028
   macro avg     0.8396    0.8810    0.8557      1028
weighted avg     0.8935    0.8804    0.8838      1028

Confusion Matrix (Fold 2)
[[240  32]
 [ 91 665]]

FOLD 3


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at 3ebdola/Dialectal-Arabic-XLM-R-Base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:54<00:00,  9.36it/s, loss=0.592] 
Epoch 2: 100%|██████████| 514/514 [00:54<00:00,  9.37it/s, loss=0.292]  
Epoch 3: 100%|██████████| 514/514 [00:54<00:00,  9.39it/s, loss=0.0249] 



Classification Report (Fold 3)
              precision    recall  f1-score   support

           0     0.8017    0.7106    0.7534       273
           1     0.8995    0.9364    0.9176       755

    accuracy                         0.8765      1028
   macro avg     0.8506    0.8235    0.8355      1028
weighted avg     0.8735    0.8765    0.8740      1028

Confusion Matrix (Fold 3)
[[194  79]
 [ 48 707]]

FOLD 4


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at 3ebdola/Dialectal-Arabic-XLM-R-Base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:54<00:00,  9.37it/s, loss=0.505] 
Epoch 2: 100%|██████████| 514/514 [00:54<00:00,  9.39it/s, loss=0.134]  
Epoch 3: 100%|██████████| 514/514 [00:54<00:00,  9.39it/s, loss=0.0483] 



Classification Report (Fold 4)
              precision    recall  f1-score   support

           0     0.6772    0.8640    0.7593       272
           1     0.9456    0.8517    0.8962       755

    accuracy                         0.8549      1027
   macro avg     0.8114    0.8578    0.8277      1027
weighted avg     0.8745    0.8549    0.8599      1027

Confusion Matrix (Fold 4)
[[235  37]
 [112 643]]

FOLD 5


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at 3ebdola/Dialectal-Arabic-XLM-R-Base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:54<00:00,  9.39it/s, loss=0.214] 
Epoch 2: 100%|██████████| 514/514 [00:54<00:00,  9.41it/s, loss=0.0277]
Epoch 3: 100%|██████████| 514/514 [00:54<00:00,  9.39it/s, loss=0.00395]



Classification Report (Fold 5)
              precision    recall  f1-score   support

           0     0.6771    0.8787    0.7648       272
           1     0.9510    0.8490    0.8971       755

    accuracy                         0.8569      1027
   macro avg     0.8140    0.8638    0.8310      1027
weighted avg     0.8785    0.8569    0.8621      1027

Confusion Matrix (Fold 5)
[[239  33]
 [114 641]]

✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅
OVERALL RESULTS (ALL FOLDS)
3ebdola/Dialectal-Arabic-XLM-R-Base
✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅

Final Classification Report
              precision    recall  f1-score   support

           0     0.7115    0.8442    0.7722      1361
           1     0.9398    0.8766    0.9071      3777

    accuracy                         0.8680      5138
   macro avg     0.8256    0.8604    0.8397      5138
weighted avg     0.8793    0.8680    0.8714      5138

Final Confusion Matrix
[[1149  212]
 [ 466 3311]]

✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅

tokenizer_config.json:   0%|          | 0.00/392 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/503 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]


FOLD 1


model.safetensors:   0%|          | 0.00/541M [00:00<?, ?B/s]

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at aubmindlab/araelectra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:43<00:00, 11.79it/s, loss=0.414] 
Epoch 2: 100%|██████████| 514/514 [00:43<00:00, 11.92it/s, loss=0.0709] 
Epoch 3: 100%|██████████| 514/514 [00:43<00:00, 11.94it/s, loss=0.00878]



Classification Report (Fold 1)
              precision    recall  f1-score   support

           0     0.7447    0.9007    0.8153       272
           1     0.9614    0.8889    0.9237       756

    accuracy                         0.8920      1028
   macro avg     0.8530    0.8948    0.8695      1028
weighted avg     0.9040    0.8920    0.8950      1028

Confusion Matrix (Fold 1)
[[245  27]
 [ 84 672]]

FOLD 2


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at aubmindlab/araelectra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:43<00:00, 11.91it/s, loss=0.293] 
Epoch 2: 100%|██████████| 514/514 [00:43<00:00, 11.93it/s, loss=0.104]  
Epoch 3: 100%|██████████| 514/514 [00:43<00:00, 11.91it/s, loss=0.0587] 



Classification Report (Fold 2)
              precision    recall  f1-score   support

           0     0.8248    0.7096    0.7628       272
           1     0.9005    0.9458    0.9226       756

    accuracy                         0.8833      1028
   macro avg     0.8626    0.8277    0.8427      1028
weighted avg     0.8805    0.8833    0.8803      1028

Confusion Matrix (Fold 2)
[[193  79]
 [ 41 715]]

FOLD 3


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at aubmindlab/araelectra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:43<00:00, 11.94it/s, loss=0.686] 
Epoch 2: 100%|██████████| 514/514 [00:43<00:00, 11.94it/s, loss=0.0827] 
Epoch 3: 100%|██████████| 514/514 [00:42<00:00, 11.95it/s, loss=0.0106] 



Classification Report (Fold 3)
              precision    recall  f1-score   support

           0     0.8578    0.7070    0.7751       273
           1     0.9004    0.9576    0.9281       755

    accuracy                         0.8911      1028
   macro avg     0.8791    0.8323    0.8516      1028
weighted avg     0.8891    0.8911    0.8875      1028

Confusion Matrix (Fold 3)
[[193  80]
 [ 32 723]]

FOLD 4


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at aubmindlab/araelectra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:43<00:00, 11.90it/s, loss=0.52]  
Epoch 2: 100%|██████████| 514/514 [00:43<00:00, 11.89it/s, loss=0.0458] 
Epoch 3: 100%|██████████| 514/514 [00:43<00:00, 11.92it/s, loss=0.0127] 



Classification Report (Fold 4)
              precision    recall  f1-score   support

           0     0.5664    0.9559    0.7114       272
           1     0.9789    0.7364    0.8405       755

    accuracy                         0.7945      1027
   macro avg     0.7727    0.8462    0.7759      1027
weighted avg     0.8696    0.7945    0.8063      1027

Confusion Matrix (Fold 4)
[[260  12]
 [199 556]]

FOLD 5


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at aubmindlab/araelectra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:43<00:00, 11.92it/s, loss=0.126] 
Epoch 2: 100%|██████████| 514/514 [00:43<00:00, 11.91it/s, loss=0.0307] 
Epoch 3: 100%|██████████| 514/514 [00:43<00:00, 11.92it/s, loss=0.0118] 



Classification Report (Fold 5)
              precision    recall  f1-score   support

           0     0.7354    0.8787    0.8007       272
           1     0.9530    0.8861    0.9183       755

    accuracy                         0.8841      1027
   macro avg     0.8442    0.8824    0.8595      1027
weighted avg     0.8954    0.8841    0.8872      1027

Confusion Matrix (Fold 5)
[[239  33]
 [ 86 669]]

✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅
OVERALL RESULTS (ALL FOLDS)
aubmindlab/araelectra-base-discriminator
✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅

Final Classification Report
              precision    recall  f1-score   support

           0     0.7188    0.8303    0.7705      1361
           1     0.9352    0.8830    0.9083      3777

    accuracy                         0.8690      5138
   macro avg     0.8270    0.8566    0.8394      5138
weighted avg     0.8779    0.8690    0.8718      5138

Final Confusion Matrix
[[1130  231]
 [ 442 3335]]

✅✅✅✅✅✅✅✅✅✅✅✅✅

config.json:   0%|          | 0.00/456 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]


FOLD 1


pytorch_model.bin:   0%|          | 0.00/445M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bashar-talafha/multi-dialect-bert-base-arabic and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1:   3%|▎         | 16/514 [00:01<00:37, 13.17it/s, loss=0.995] 

model.safetensors:   0%|          | 0.00/445M [00:00<?, ?B/s]

Epoch 1: 100%|██████████| 514/514 [00:39<00:00, 13.02it/s, loss=0.522] 
Epoch 2: 100%|██████████| 514/514 [00:38<00:00, 13.26it/s, loss=0.055]  
Epoch 3: 100%|██████████| 514/514 [00:38<00:00, 13.26it/s, loss=0.0022]  



Classification Report (Fold 1)
              precision    recall  f1-score   support

           0     0.7417    0.9081    0.8165       272
           1     0.9640    0.8862    0.9235       756

    accuracy                         0.8920      1028
   macro avg     0.8529    0.8972    0.8700      1028
weighted avg     0.9052    0.8920    0.8952      1028

Confusion Matrix (Fold 1)
[[247  25]
 [ 86 670]]

FOLD 2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bashar-talafha/multi-dialect-bert-base-arabic and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:38<00:00, 13.27it/s, loss=0.396] 
Epoch 2: 100%|██████████| 514/514 [00:38<00:00, 13.28it/s, loss=0.0503] 
Epoch 3: 100%|██████████| 514/514 [00:38<00:00, 13.22it/s, loss=0.0161]  



Classification Report (Fold 2)
              precision    recall  f1-score   support

           0     0.6640    0.9007    0.7644       272
           1     0.9590    0.8360    0.8933       756

    accuracy                         0.8531      1028
   macro avg     0.8115    0.8684    0.8289      1028
weighted avg     0.8810    0.8531    0.8592      1028

Confusion Matrix (Fold 2)
[[245  27]
 [124 632]]

FOLD 3


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bashar-talafha/multi-dialect-bert-base-arabic and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:38<00:00, 13.23it/s, loss=0.28]  
Epoch 2: 100%|██████████| 514/514 [00:38<00:00, 13.25it/s, loss=0.172]  
Epoch 3: 100%|██████████| 514/514 [00:38<00:00, 13.27it/s, loss=0.00743] 



Classification Report (Fold 3)
              precision    recall  f1-score   support

           0     0.8475    0.7326    0.7859       273
           1     0.9078    0.9523    0.9295       755

    accuracy                         0.8940      1028
   macro avg     0.8776    0.8425    0.8577      1028
weighted avg     0.8918    0.8940    0.8914      1028

Confusion Matrix (Fold 3)
[[200  73]
 [ 36 719]]

FOLD 4


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bashar-talafha/multi-dialect-bert-base-arabic and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:38<00:00, 13.24it/s, loss=0.307] 
Epoch 2: 100%|██████████| 514/514 [00:39<00:00, 13.15it/s, loss=0.0218] 
Epoch 3: 100%|██████████| 514/514 [00:39<00:00, 13.09it/s, loss=0.0272]  



Classification Report (Fold 4)
              precision    recall  f1-score   support

           0     0.8139    0.6912    0.7475       272
           1     0.8945    0.9430    0.9181       755

    accuracy                         0.8763      1027
   macro avg     0.8542    0.8171    0.8328      1027
weighted avg     0.8731    0.8763    0.8729      1027

Confusion Matrix (Fold 4)
[[188  84]
 [ 43 712]]

FOLD 5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bashar-talafha/multi-dialect-bert-base-arabic and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:39<00:00, 13.16it/s, loss=0.0668]
Epoch 2: 100%|██████████| 514/514 [00:38<00:00, 13.23it/s, loss=0.037]  
Epoch 3: 100%|██████████| 514/514 [00:38<00:00, 13.24it/s, loss=0.115]   



Classification Report (Fold 5)
              precision    recall  f1-score   support

           0     0.7864    0.8529    0.8183       272
           1     0.9454    0.9166    0.9307       755

    accuracy                         0.8997      1027
   macro avg     0.8659    0.8847    0.8745      1027
weighted avg     0.9033    0.8997    0.9010      1027

Confusion Matrix (Fold 5)
[[232  40]
 [ 63 692]]

✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅
OVERALL RESULTS (ALL FOLDS)
bashar-talafha/multi-dialect-bert-base-arabic
✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅

Final Classification Report
              precision    recall  f1-score   support

           0     0.7596    0.8170    0.7873      1361
           1     0.9322    0.9068    0.9193      3777

    accuracy                         0.8830      5138
   macro avg     0.8459    0.8619    0.8533      5138
weighted avg     0.8865    0.8830    0.8844      5138

Final Confusion Matrix
[[1112  249]
 [ 352 3425]]

✅✅✅✅✅✅✅✅

tokenizer_config.json:   0%|          | 0.00/526 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.08M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]


FOLD 1


config.json:   0%|          | 0.00/645 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/471M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:34<00:00, 14.99it/s, loss=0.549] 
Epoch 2: 100%|██████████| 514/514 [00:34<00:00, 15.08it/s, loss=0.253] 
Epoch 3: 100%|██████████| 514/514 [00:34<00:00, 15.07it/s, loss=0.0254] 



Classification Report (Fold 1)
              precision    recall  f1-score   support

           0     0.7384    0.8199    0.7770       272
           1     0.9325    0.8955    0.9136       756

    accuracy                         0.8755      1028
   macro avg     0.8355    0.8577    0.8453      1028
weighted avg     0.8812    0.8755    0.8775      1028

Confusion Matrix (Fold 1)
[[223  49]
 [ 79 677]]

FOLD 2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:34<00:00, 14.95it/s, loss=0.5]   
Epoch 2: 100%|██████████| 514/514 [00:34<00:00, 14.89it/s, loss=0.224] 
Epoch 3: 100%|██████████| 514/514 [00:34<00:00, 14.99it/s, loss=0.0407] 



Classification Report (Fold 2)
              precision    recall  f1-score   support

           0     0.8320    0.7463    0.7868       272
           1     0.9120    0.9458    0.9286       756

    accuracy                         0.8930      1028
   macro avg     0.8720    0.8460    0.8577      1028
weighted avg     0.8908    0.8930    0.8911      1028

Confusion Matrix (Fold 2)
[[203  69]
 [ 41 715]]

FOLD 3


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:34<00:00, 15.06it/s, loss=0.815] 
Epoch 2: 100%|██████████| 514/514 [00:34<00:00, 15.07it/s, loss=0.169] 
Epoch 3: 100%|██████████| 514/514 [00:34<00:00, 14.89it/s, loss=0.0178] 



Classification Report (Fold 3)
              precision    recall  f1-score   support

           0     0.7482    0.7509    0.7495       273
           1     0.9098    0.9086    0.9092       755

    accuracy                         0.8667      1028
   macro avg     0.8290    0.8298    0.8294      1028
weighted avg     0.8669    0.8667    0.8668      1028

Confusion Matrix (Fold 3)
[[205  68]
 [ 69 686]]

FOLD 4


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:34<00:00, 14.86it/s, loss=0.609] 
Epoch 2: 100%|██████████| 514/514 [00:34<00:00, 14.86it/s, loss=0.342] 
Epoch 3: 100%|██████████| 514/514 [00:34<00:00, 14.87it/s, loss=0.0176] 



Classification Report (Fold 4)
              precision    recall  f1-score   support

           0     0.7241    0.7721    0.7473       272
           1     0.9159    0.8940    0.9048       755

    accuracy                         0.8617      1027
   macro avg     0.8200    0.8330    0.8261      1027
weighted avg     0.8651    0.8617    0.8631      1027

Confusion Matrix (Fold 4)
[[210  62]
 [ 80 675]]

FOLD 5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:34<00:00, 14.99it/s, loss=0.23]  
Epoch 2: 100%|██████████| 514/514 [00:34<00:00, 14.87it/s, loss=0.0229]
Epoch 3: 100%|██████████| 514/514 [00:34<00:00, 14.93it/s, loss=0.00563]



Classification Report (Fold 5)
              precision    recall  f1-score   support

           0     0.7325    0.8456    0.7850       272
           1     0.9411    0.8887    0.9142       755

    accuracy                         0.8773      1027
   macro avg     0.8368    0.8672    0.8496      1027
weighted avg     0.8858    0.8773    0.8800      1027

Confusion Matrix (Fold 5)
[[230  42]
 [ 84 671]]

✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅
OVERALL RESULTS (ALL FOLDS)
sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅

Final Classification Report
              precision    recall  f1-score   support

           0     0.7521    0.7869    0.7691      1361
           1     0.9219    0.9065    0.9142      3777

    accuracy                         0.8749      5138
   macro avg     0.8370    0.8467    0.8416      5138
weighted avg     0.8769    0.8749    0.8757      5138

Final Confusion Matrix
[[1071  290]
 [ 353 34

tokenizer_config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/589 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]


FOLD 1


model.safetensors:   0%|          | 0.00/539M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:28<00:00, 17.99it/s, loss=0.862] 
Epoch 2: 100%|██████████| 514/514 [00:28<00:00, 18.13it/s, loss=0.327] 
Epoch 3: 100%|██████████| 514/514 [00:28<00:00, 18.22it/s, loss=0.481]  



Classification Report (Fold 1)
              precision    recall  f1-score   support

           0     0.7311    0.8199    0.7730       272
           1     0.9322    0.8915    0.9114       756

    accuracy                         0.8726      1028
   macro avg     0.8317    0.8557    0.8422      1028
weighted avg     0.8790    0.8726    0.8748      1028

Confusion Matrix (Fold 1)
[[223  49]
 [ 82 674]]

FOLD 2


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:28<00:00, 18.18it/s, loss=0.791] 
Epoch 2: 100%|██████████| 514/514 [00:28<00:00, 18.22it/s, loss=0.544]  
Epoch 3: 100%|██████████| 514/514 [00:28<00:00, 18.23it/s, loss=0.0105] 



Classification Report (Fold 2)
              precision    recall  f1-score   support

           0     0.6930    0.8382    0.7587       272
           1     0.9371    0.8664    0.9003       756

    accuracy                         0.8589      1028
   macro avg     0.8150    0.8523    0.8295      1028
weighted avg     0.8725    0.8589    0.8629      1028

Confusion Matrix (Fold 2)
[[228  44]
 [101 655]]

FOLD 3


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:28<00:00, 18.21it/s, loss=0.945] 
Epoch 2: 100%|██████████| 514/514 [00:28<00:00, 18.20it/s, loss=0.766] 
Epoch 3: 100%|██████████| 514/514 [00:28<00:00, 18.24it/s, loss=0.579]  



Classification Report (Fold 3)
              precision    recall  f1-score   support

           0     0.7251    0.7729    0.7482       273
           1     0.9159    0.8940    0.9048       755

    accuracy                         0.8619      1028
   macro avg     0.8205    0.8335    0.8265      1028
weighted avg     0.8652    0.8619    0.8632      1028

Confusion Matrix (Fold 3)
[[211  62]
 [ 80 675]]

FOLD 4


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:28<00:00, 18.17it/s, loss=0.663] 
Epoch 2: 100%|██████████| 514/514 [00:28<00:00, 18.19it/s, loss=0.596] 
Epoch 3: 100%|██████████| 514/514 [00:28<00:00, 18.23it/s, loss=0.131]  



Classification Report (Fold 4)
              precision    recall  f1-score   support

           0     0.6941    0.7757    0.7326       272
           1     0.9156    0.8768    0.8958       755

    accuracy                         0.8500      1027
   macro avg     0.8049    0.8263    0.8142      1027
weighted avg     0.8570    0.8500    0.8526      1027

Confusion Matrix (Fold 4)
[[211  61]
 [ 93 662]]

FOLD 5


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:28<00:00, 18.15it/s, loss=0.166] 
Epoch 2: 100%|██████████| 514/514 [00:28<00:00, 18.21it/s, loss=0.227] 
Epoch 3: 100%|██████████| 514/514 [00:28<00:00, 18.22it/s, loss=0.0399] 



Classification Report (Fold 5)
              precision    recall  f1-score   support

           0     0.7747    0.7206    0.7467       272
           1     0.9018    0.9245    0.9130       755

    accuracy                         0.8705      1027
   macro avg     0.8383    0.8225    0.8298      1027
weighted avg     0.8681    0.8705    0.8690      1027

Confusion Matrix (Fold 5)
[[196  76]
 [ 57 698]]

✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅
OVERALL RESULTS (ALL FOLDS)
sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking
✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅

Final Classification Report
              precision    recall  f1-score   support

           0     0.7213    0.7855    0.7520      1361
           1     0.9201    0.8907    0.9052      3777

    accuracy                         0.8628      5138
   macro avg     0.8207    0.8381    0.8286      5138
weighted avg     0.8675    0.8628    0.8646      5138

Final Confusion Matrix
[[1069  292]


tokenizer_config.json:   0%|          | 0.00/528 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/607 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]


FOLD 1


model.safetensors:   0%|          | 0.00/539M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/distiluse-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:28<00:00, 18.20it/s, loss=0.653] 
Epoch 2: 100%|██████████| 514/514 [00:28<00:00, 18.25it/s, loss=0.453] 
Epoch 3: 100%|██████████| 514/514 [00:28<00:00, 18.30it/s, loss=0.441] 



Classification Report (Fold 1)
              precision    recall  f1-score   support

           0     0.7127    0.7022    0.7074       272
           1     0.8934    0.8981    0.8958       756

    accuracy                         0.8463      1028
   macro avg     0.8031    0.8002    0.8016      1028
weighted avg     0.8456    0.8463    0.8459      1028

Confusion Matrix (Fold 1)
[[191  81]
 [ 77 679]]

FOLD 2


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/distiluse-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:28<00:00, 18.27it/s, loss=0.67]  
Epoch 2: 100%|██████████| 514/514 [00:28<00:00, 18.28it/s, loss=0.38]  
Epoch 3: 100%|██████████| 514/514 [00:28<00:00, 18.31it/s, loss=0.186]  



Classification Report (Fold 2)
              precision    recall  f1-score   support

           0     0.7208    0.7500    0.7351       272
           1     0.9087    0.8955    0.9021       756

    accuracy                         0.8570      1028
   macro avg     0.8148    0.8228    0.8186      1028
weighted avg     0.8590    0.8570    0.8579      1028

Confusion Matrix (Fold 2)
[[204  68]
 [ 79 677]]

FOLD 3


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/distiluse-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:28<00:00, 18.29it/s, loss=0.688] 
Epoch 2: 100%|██████████| 514/514 [00:28<00:00, 18.27it/s, loss=0.119] 
Epoch 3: 100%|██████████| 514/514 [00:28<00:00, 18.29it/s, loss=0.0628] 



Classification Report (Fold 3)
              precision    recall  f1-score   support

           0     0.7723    0.5714    0.6568       273
           1     0.8584    0.9391    0.8969       755

    accuracy                         0.8414      1028
   macro avg     0.8153    0.7553    0.7769      1028
weighted avg     0.8355    0.8414    0.8331      1028

Confusion Matrix (Fold 3)
[[156 117]
 [ 46 709]]

FOLD 4


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/distiluse-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:28<00:00, 18.27it/s, loss=0.508] 
Epoch 2: 100%|██████████| 514/514 [00:28<00:00, 18.29it/s, loss=0.394] 
Epoch 3: 100%|██████████| 514/514 [00:28<00:00, 18.32it/s, loss=0.0895] 



Classification Report (Fold 4)
              precision    recall  f1-score   support

           0     0.7297    0.6949    0.7119       272
           1     0.8919    0.9073    0.8995       755

    accuracy                         0.8510      1027
   macro avg     0.8108    0.8011    0.8057      1027
weighted avg     0.8490    0.8510    0.8498      1027

Confusion Matrix (Fold 4)
[[189  83]
 [ 70 685]]

FOLD 5


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/distiluse-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:28<00:00, 18.33it/s, loss=0.303] 
Epoch 2: 100%|██████████| 514/514 [00:28<00:00, 18.30it/s, loss=0.188] 
Epoch 3: 100%|██████████| 514/514 [00:28<00:00, 18.29it/s, loss=0.0173] 



Classification Report (Fold 5)
              precision    recall  f1-score   support

           0     0.6839    0.7794    0.7285       272
           1     0.9163    0.8702    0.8927       755

    accuracy                         0.8462      1027
   macro avg     0.8001    0.8248    0.8106      1027
weighted avg     0.8548    0.8462    0.8492      1027

Confusion Matrix (Fold 5)
[[212  60]
 [ 98 657]]

✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅
OVERALL RESULTS (ALL FOLDS)
sentence-transformers/distiluse-base-multilingual-cased
✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅

Final Classification Report
              precision    recall  f1-score   support

           0     0.7201    0.6995    0.7097      1361
           1     0.8928    0.9020    0.8974      3777

    accuracy                         0.8484      5138
   macro avg     0.8065    0.8008    0.8035      5138
weighted avg     0.8471    0.8484    0.8477      5138

Final Confusion Matrix
[[ 952  409]
 [ 370 3407]]

tokenizer_config.json:   0%|          | 0.00/371 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/572 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]


FOLD 1


model.safetensors:   0%|          | 0.00/539M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/clip-ViT-B-32-multilingual-v1 and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:28<00:00, 18.21it/s, loss=0.724] 
Epoch 2: 100%|██████████| 514/514 [00:28<00:00, 18.19it/s, loss=0.757]  
Epoch 3: 100%|██████████| 514/514 [00:28<00:00, 18.29it/s, loss=0.638]  



Classification Report (Fold 1)
              precision    recall  f1-score   support

           0     0.7374    0.8051    0.7698       272
           1     0.9275    0.8968    0.9119       756

    accuracy                         0.8726      1028
   macro avg     0.8324    0.8510    0.8408      1028
weighted avg     0.8772    0.8726    0.8743      1028

Confusion Matrix (Fold 1)
[[219  53]
 [ 78 678]]

FOLD 2


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/clip-ViT-B-32-multilingual-v1 and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:28<00:00, 18.23it/s, loss=0.683] 
Epoch 2: 100%|██████████| 514/514 [00:28<00:00, 18.22it/s, loss=0.313] 
Epoch 3: 100%|██████████| 514/514 [00:28<00:00, 18.26it/s, loss=0.0113] 



Classification Report (Fold 2)
              precision    recall  f1-score   support

           0     0.7358    0.8603    0.7932       272
           1     0.9465    0.8889    0.9168       756

    accuracy                         0.8813      1028
   macro avg     0.8412    0.8746    0.8550      1028
weighted avg     0.8907    0.8813    0.8841      1028

Confusion Matrix (Fold 2)
[[234  38]
 [ 84 672]]

FOLD 3


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/clip-ViT-B-32-multilingual-v1 and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:28<00:00, 18.18it/s, loss=0.614] 
Epoch 2: 100%|██████████| 514/514 [00:28<00:00, 18.19it/s, loss=0.119]  
Epoch 3: 100%|██████████| 514/514 [00:28<00:00, 18.17it/s, loss=0.111]  



Classification Report (Fold 3)
              precision    recall  f1-score   support

           0     0.8512    0.6703    0.7500       273
           1     0.8893    0.9576    0.9222       755

    accuracy                         0.8813      1028
   macro avg     0.8702    0.8140    0.8361      1028
weighted avg     0.8792    0.8813    0.8765      1028

Confusion Matrix (Fold 3)
[[183  90]
 [ 32 723]]

FOLD 4


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/clip-ViT-B-32-multilingual-v1 and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:28<00:00, 18.20it/s, loss=0.534] 
Epoch 2: 100%|██████████| 514/514 [00:28<00:00, 18.21it/s, loss=0.329] 
Epoch 3: 100%|██████████| 514/514 [00:28<00:00, 18.32it/s, loss=0.0753] 



Classification Report (Fold 4)
              precision    recall  f1-score   support

           0     0.7222    0.7647    0.7429       272
           1     0.9134    0.8940    0.9036       755

    accuracy                         0.8598      1027
   macro avg     0.8178    0.8294    0.8232      1027
weighted avg     0.8628    0.8598    0.8610      1027

Confusion Matrix (Fold 4)
[[208  64]
 [ 80 675]]

FOLD 5


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/clip-ViT-B-32-multilingual-v1 and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:28<00:00, 18.32it/s, loss=0.294] 
Epoch 2: 100%|██████████| 514/514 [00:28<00:00, 18.29it/s, loss=0.0654]
Epoch 3: 100%|██████████| 514/514 [00:28<00:00, 18.25it/s, loss=0.0811] 



Classification Report (Fold 5)
              precision    recall  f1-score   support

           0     0.7634    0.7831    0.7731       272
           1     0.9211    0.9126    0.9168       755

    accuracy                         0.8783      1027
   macro avg     0.8423    0.8478    0.8450      1027
weighted avg     0.8794    0.8783    0.8788      1027

Confusion Matrix (Fold 5)
[[213  59]
 [ 66 689]]

✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅
OVERALL RESULTS (ALL FOLDS)
sentence-transformers/clip-ViT-B-32-multilingual-v1
✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅

Final Classification Report
              precision    recall  f1-score   support

           0     0.7566    0.7766    0.7665      1361
           1     0.9187    0.9100    0.9143      3777

    accuracy                         0.8747      5138
   macro avg     0.8377    0.8433    0.8404      5138
weighted avg     0.8758    0.8747    0.8752      5138

Final Confusion Matrix
[[1057  304]
 [ 340 3437]]

✅✅

tokenizer_config.json:   0%|          | 0.00/452 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/556 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]


FOLD 1


model.safetensors:   0%|          | 0.00/539M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/distiluse-base-multilingual-cased-v1 and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:28<00:00, 17.93it/s, loss=0.537] 
Epoch 2: 100%|██████████| 514/514 [00:28<00:00, 17.94it/s, loss=0.427] 
Epoch 3: 100%|██████████| 514/514 [00:28<00:00, 17.99it/s, loss=0.0351] 



Classification Report (Fold 1)
              precision    recall  f1-score   support

           0     0.7416    0.7279    0.7347       272
           1     0.9028    0.9087    0.9057       756

    accuracy                         0.8609      1028
   macro avg     0.8222    0.8183    0.8202      1028
weighted avg     0.8601    0.8609    0.8605      1028

Confusion Matrix (Fold 1)
[[198  74]
 [ 69 687]]

FOLD 2


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/distiluse-base-multilingual-cased-v1 and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:28<00:00, 17.96it/s, loss=0.719] 
Epoch 2: 100%|██████████| 514/514 [00:28<00:00, 17.96it/s, loss=0.536] 
Epoch 3: 100%|██████████| 514/514 [00:28<00:00, 18.04it/s, loss=0.174] 



Classification Report (Fold 2)
              precision    recall  f1-score   support

           0     0.7869    0.7059    0.7442       272
           1     0.8980    0.9312    0.9143       756

    accuracy                         0.8716      1028
   macro avg     0.8424    0.8185    0.8292      1028
weighted avg     0.8686    0.8716    0.8693      1028

Confusion Matrix (Fold 2)
[[192  80]
 [ 52 704]]

FOLD 3


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/distiluse-base-multilingual-cased-v1 and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:28<00:00, 18.25it/s, loss=0.682] 
Epoch 2: 100%|██████████| 514/514 [00:28<00:00, 18.04it/s, loss=0.0818]
Epoch 3: 100%|██████████| 514/514 [00:28<00:00, 18.01it/s, loss=0.169]  



Classification Report (Fold 3)
              precision    recall  f1-score   support

           0     0.7412    0.6923    0.7159       273
           1     0.8913    0.9126    0.9018       755

    accuracy                         0.8541      1028
   macro avg     0.8163    0.8024    0.8089      1028
weighted avg     0.8515    0.8541    0.8525      1028

Confusion Matrix (Fold 3)
[[189  84]
 [ 66 689]]

FOLD 4


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/distiluse-base-multilingual-cased-v1 and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:28<00:00, 18.24it/s, loss=0.744] 
Epoch 2: 100%|██████████| 514/514 [00:27<00:00, 18.41it/s, loss=0.417] 
Epoch 3: 100%|██████████| 514/514 [00:27<00:00, 18.39it/s, loss=0.0643]



Classification Report (Fold 4)
              precision    recall  f1-score   support

           0     0.6939    0.7500    0.7208       272
           1     0.9072    0.8808    0.8938       755

    accuracy                         0.8462      1027
   macro avg     0.8006    0.8154    0.8073      1027
weighted avg     0.8507    0.8462    0.8480      1027

Confusion Matrix (Fold 4)
[[204  68]
 [ 90 665]]

FOLD 5


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/distiluse-base-multilingual-cased-v1 and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:28<00:00, 18.20it/s, loss=0.217] 
Epoch 2: 100%|██████████| 514/514 [00:28<00:00, 18.30it/s, loss=0.0466]
Epoch 3: 100%|██████████| 514/514 [00:27<00:00, 18.36it/s, loss=0.039] 



Classification Report (Fold 5)
              precision    recall  f1-score   support

           0     0.7345    0.7426    0.7386       272
           1     0.9069    0.9033    0.9051       755

    accuracy                         0.8608      1027
   macro avg     0.8207    0.8230    0.8218      1027
weighted avg     0.8613    0.8608    0.8610      1027

Confusion Matrix (Fold 5)
[[202  70]
 [ 73 682]]

✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅
OVERALL RESULTS (ALL FOLDS)
sentence-transformers/distiluse-base-multilingual-cased-v1
✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅

Final Classification Report
              precision    recall  f1-score   support

           0     0.7378    0.7237    0.7307      1361
           1     0.9011    0.9073    0.9042      3777

    accuracy                         0.8587      5138
   macro avg     0.8195    0.8155    0.8175      5138
weighted avg     0.8579    0.8587    0.8583      5138

Final Confusion Matrix
[[ 985  376]
 [ 350 342

tokenizer_config.json:   0%|          | 0.00/402 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/723 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]


FOLD 1


model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/paraphrase-multilingual-mpnet-base-v2 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:54<00:00,  9.35it/s, loss=0.999] 
Epoch 2: 100%|██████████| 514/514 [00:54<00:00,  9.40it/s, loss=0.609] 
Epoch 3: 100%|██████████| 514/514 [00:54<00:00,  9.38it/s, loss=0.0832] 



Classification Report (Fold 1)
              precision    recall  f1-score   support

           0     0.8837    0.6985    0.7803       272
           1     0.8991    0.9669    0.9318       756

    accuracy                         0.8959      1028
   macro avg     0.8914    0.8327    0.8560      1028
weighted avg     0.8951    0.8959    0.8917      1028

Confusion Matrix (Fold 1)
[[190  82]
 [ 25 731]]

FOLD 2


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/paraphrase-multilingual-mpnet-base-v2 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:54<00:00,  9.38it/s, loss=0.324] 
Epoch 2: 100%|██████████| 514/514 [00:54<00:00,  9.38it/s, loss=0.254]  
Epoch 3: 100%|██████████| 514/514 [00:55<00:00,  9.28it/s, loss=0.0117] 



Classification Report (Fold 2)
              precision    recall  f1-score   support

           0     0.8514    0.6949    0.7652       272
           1     0.8970    0.9563    0.9257       756

    accuracy                         0.8872      1028
   macro avg     0.8742    0.8256    0.8455      1028
weighted avg     0.8849    0.8872    0.8833      1028

Confusion Matrix (Fold 2)
[[189  83]
 [ 33 723]]

FOLD 3


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/paraphrase-multilingual-mpnet-base-v2 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:55<00:00,  9.23it/s, loss=0.376] 
Epoch 2: 100%|██████████| 514/514 [00:55<00:00,  9.24it/s, loss=0.112]  
Epoch 3: 100%|██████████| 514/514 [00:55<00:00,  9.26it/s, loss=0.0179] 



Classification Report (Fold 3)
              precision    recall  f1-score   support

           0     0.8160    0.7473    0.7801       273
           1     0.9113    0.9391    0.9250       755

    accuracy                         0.8881      1028
   macro avg     0.8637    0.8432    0.8525      1028
weighted avg     0.8860    0.8881    0.8865      1028

Confusion Matrix (Fold 3)
[[204  69]
 [ 46 709]]

FOLD 4


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/paraphrase-multilingual-mpnet-base-v2 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:55<00:00,  9.23it/s, loss=0.352] 
Epoch 2: 100%|██████████| 514/514 [00:55<00:00,  9.30it/s, loss=0.569]  
Epoch 3: 100%|██████████| 514/514 [00:54<00:00,  9.37it/s, loss=0.0199] 



Classification Report (Fold 4)
              precision    recall  f1-score   support

           0     0.7331    0.7978    0.7641       272
           1     0.9248    0.8954    0.9098       755

    accuracy                         0.8695      1027
   macro avg     0.8289    0.8466    0.8370      1027
weighted avg     0.8740    0.8695    0.8712      1027

Confusion Matrix (Fold 4)
[[217  55]
 [ 79 676]]

FOLD 5


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/paraphrase-multilingual-mpnet-base-v2 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 514/514 [00:54<00:00,  9.36it/s, loss=0.0941]
Epoch 2: 100%|██████████| 514/514 [00:55<00:00,  9.32it/s, loss=0.0213] 
Epoch 3: 100%|██████████| 514/514 [00:54<00:00,  9.36it/s, loss=0.0312] 



Classification Report (Fold 5)
              precision    recall  f1-score   support

           0     0.8008    0.7096    0.7524       272
           1     0.8995    0.9364    0.9176       755

    accuracy                         0.8763      1027
   macro avg     0.8502    0.8230    0.8350      1027
weighted avg     0.8734    0.8763    0.8738      1027

Confusion Matrix (Fold 5)
[[193  79]
 [ 48 707]]

✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅
OVERALL RESULTS (ALL FOLDS)
sentence-transformers/paraphrase-multilingual-mpnet-base-v2
✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅✅

Final Classification Report
              precision    recall  f1-score   support

           0     0.8113    0.7296    0.7683      1361
           1     0.9060    0.9388    0.9221      3777

    accuracy                         0.8834      5138
   macro avg     0.8586    0.8342    0.8452      5138
weighted avg     0.8809    0.8834    0.8814      5138

Final Confusion Matrix
[[ 993  368]
 [ 231 35