In [None]:
# Mount your Google Drive if needed
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Note: Some libraries or dependencies used in this code may become deprecated over time.
# If you encounter deprecated libraries, replace them with suitable alternatives and
# update the corresponding sections of the code that rely on the deprecated classes or methods accordingly.

!pip install transformers[torch]==4.31.0
!pip install datasets

import os
import json
import gzip
import pandas as pd
import collections
import nltk
from google.colab import files

from urllib.request import urlopen

import random
import numpy as np
from tqdm import tqdm_notebook as tqdm
from collections import defaultdict
import pickle
from IPython.display import clear_output
import torch
from transformers import RobertaTokenizerFast
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support,  recall_score, precision_score, f1_score, roc_auc_score, confusion_matrix, matthews_corrcoef
from sklearn.model_selection import train_test_split,KFold, StratifiedKFold
from transformers import RobertaForSequenceClassification, Trainer, TrainingArguments, EarlyStoppingCallback, AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_metric





In [None]:

if torch.cuda.is_available():
    print(torch.cuda.device_count())
    print(torch.cuda.get_device_name(0))

# Training

In [None]:
def set_seed(SEED):
    """ Set random seed to all """
    random.seed(SEED)
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
    torch.backends.cudnn.deterministic = True



class AmazonDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels= None):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        if self.labels:
           item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.encodings["input_ids"])




def prepare_auxilary_datasets(dataset):

    X_test =  dataset["sentence"]
    y_test =  dataset["label"]   if "label" in dataset.columns.values else   dataset["helpful"]
    # Encoding
    test_encodings = tokenizer(X_test.tolist(), truncation=True, padding=True, max_length=40)
    test_dataset = AmazonDataset(test_encodings, y_test.tolist())
    return test_dataset





def compute_metrics(eval_pred):

    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=1)

    accuracy = accuracy_score(y_true=labels, y_pred=predictions)
    recall = recall_score(y_true=labels, y_pred=predictions)
    precision = precision_score(y_true=labels, y_pred=predictions)
    f1 = f1_score(y_true=labels, y_pred=predictions)
    macro_f1 = f1_score(y_true=labels, y_pred=predictions, average= 'macro')
    mcc = matthews_corrcoef(labels, predictions)
    auc = roc_auc_score(y_true= labels, y_score= logits[:, 1])
    tn, fp, fn, tp = confusion_matrix(y_true= labels, y_pred=predictions).ravel()
    specificity = tn / (tn+fp)

    return {
        'accuracy'    : accuracy,
        'f1'          : f1,
        'macro_f1'    : macro_f1,
        'precision'   : precision,
        'recall'      : recall,
        'specificity' : specificity,
        'mcc'         : mcc,
        'AUC'         : auc
    }



def return_training_args():

    training_args = TrainingArguments(
        output_dir="test-amazon",
        evaluation_strategy = "epoch",  # there would be no evaluation during training when doing simple K-fold cross validation a
        learning_rate=2e-5,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=256,
        num_train_epochs = 6,
        weight_decay=0.01,
        load_best_model_at_end=False,
        logging_strategy = "steps",
        logging_steps = 50,
        save_strategy= "no",
    )
    return training_args




In [None]:
frequency = pd.read_pickle('/content/drive/MyDrive/paper1/datasets/dictionary frequency of occurence of needs')

# In domain dataset
data_set_1 = pd.read_csv('/content/drive/MyDrive/Fairness paper datasets/8000_sentences_with_need_IDs.csv')

# Out of domain datasets
data_set_2 = pd.read_csv('/content/drive/MyDrive/Fairness paper datasets/Electronics.csv')
data_set_3 = pd.read_csv('/content/drive/MyDrive/Fairness paper datasets/baby.csv')
data_set_4 = pd.read_csv('/content/drive/MyDrive/Fairness paper datasets/Pet_supplies.csv')
data_set_5 = pd.read_csv('/content/drive/MyDrive/Fairness paper datasets/Sport_outdoors.csv')

# Ensure the 'sentence' column is of type 'str'
data_set_1['sentence'] = data_set_1['sentence'].astype(str)
data_set_2['sentence'] = data_set_2['sentence'].astype(str)
data_set_3['sentence'] = data_set_3['sentence'].astype(str)
data_set_4['sentence'] = data_set_4['sentence'].astype(str)
data_set_5['sentence'] = data_set_5['sentence'].astype(str)

ratios = {
           'A': 0.50,
           'B': 0.20,
           'C': 0.10,
           'D': 0.05,
           'E': 0.025,
           'F': 0.01
           }

models = {
              'albert_base' :"albert-base-v2",
              'distilbert'  :"distilbert-base-uncased",
              'bertB'       :"bert-base-uncased",
              'bertL'       :"bert-large-uncased",
              'robertB'     :"roberta-base",
              'robertL'     :"roberta-large",
              'allenaiB'    :"allenai/reviews_roberta_base",

              'ALBERT-L'      :"albert/albert-large-v2",
              'DISRoBERTa-B'  :"distilbert/distilroberta-base",
              'XLNet-B'       :"xlnet/xlnet-base-cased",
              'XLNet-L'       :"xlnet/xlnet-large-cased",
              'DEBERT-B'      :"microsoft/deberta-base",
              'DEBERT-L'      :"microsoft/deberta-large",
              'XLM-B'         :"FacebookAI/xlm-roberta-base",
              'XLM-L'         :"FacebookAI/xlm-roberta-large"
          }

seeds = [94, 791, 5, 6932, 1759, 323, 1694, 9741, 200, 999]  # randomly selected
all_datasets_ratio_models_seed_results = {}
results = []
results_exclude = []
results_garbage = []
results_uninf_exc = []



for ratio_label, ratio in ratios.items():
    all_datasets_ratio_models_seed_results.update({ratio:{}})

    for model_name, model_address in models.items():
        tokenizer = AutoTokenizer.from_pretrained(model_address)
        all_datasets_ratio_models_seed_results[ratio].update({model_name:{}})
        training_args = return_training_args()  # to reinitialize training arguments:  must be added
        if (ratio < 0.20) and (ratio >= 0.05):
           training_args.per_device_train_batch_size = 8
           training_args.logging_steps = 25
        elif (ratio < 0.05):
           training_args.per_device_train_batch_size = 4
           training_args.logging_steps = 10


        if model_name == 'bertL':
           training_args.learning_rate = 2e-5
        if model_name == 'robertL':
           training_args.learning_rate = 1.25e-5
        if model_name == 'ALBERT-L' :
           training_args.learning_rate = 8e-6
        if model_name == 'DISRoBERTa-B':
           training_args.learning_rate = 2e-5
        if model_name == 'XLNet-B':
           training_args.learning_rate = 2e-5
        if model_name == 'XLNet-L' :
           training_args.learning_rate = 1e-5
        if model_name == 'DEBERT-B':
           training_args.learning_rate = 3e-5
        if model_name == 'DEBERT-L':
           training_args.learning_rate = 1e-5
        if model_name ==  'XLM-B':
           training_args.learning_rate = 2e-5
        if model_name ==  'XLM-L':
           training_args.learning_rate = 8e-6
        else:
           training_args.learning_rate = 2e-5

        for seed in seeds:
            set_seed(seed)
            training_args.seed = seed

            r = int(ratio * data_set_1.shape[0] / 2)
            df_train_uninf = data_set_1[data_set_1.label == 0].sample(n = r, random_state = seed)
            df_train_inf = data_set_1[data_set_1.label == 1].sample(n = r, random_state = seed)
            df_train = pd.concat([df_train_inf, df_train_uninf]).sample(frac= 1, random_state = seed)
            # Find the rows not selected
            df_test_all = data_set_1.drop(df_train.index)
            df_val_uninf = df_test_all[df_test_all.label == 0].sample(n = 400, random_state = seed)
            df_val_inf = df_test_all[df_test_all.label == 1].sample(n = 400, random_state = seed)
            df_val  = pd.concat([df_val_inf, df_val_uninf]).sample(frac= 1, random_state = seed)
            df_test = df_test_all.drop(df_val.index)

            # Reset index
            df_train = df_train.reset_index(drop=True)
            df_val   = df_val.reset_index(drop=True)
            df_test  = df_test.reset_index(drop=True)
            # Encoding
            train_main_encodings = tokenizer(df_train.sentence.tolist(), truncation=True, padding=True, max_length=40)
            val_main_encodings   = tokenizer(df_val.sentence.tolist(), truncation=True, padding=True, max_length=40)
            test_main_encodings  = tokenizer(df_test.sentence.tolist(), truncation=True, padding=True, max_length=40)


            train_dataset     = AmazonDataset(train_main_encodings, df_train.label.tolist())
            val_dataset       = AmazonDataset(val_main_encodings, df_val.label.tolist())
            test_main_dataset = AmazonDataset(test_main_encodings, df_test.label.tolist())


            print(f'Model seed is: {training_args.seed}, total number of '
                  f'training (for each informative and uninform class) and test samples: '
                  f'{df_train.shape[0]}, {df_test.shape[0]}', ratio, model_name,
                  training_args.seed, training_args.learning_rate,
                  training_args.per_device_train_batch_size)
            model = AutoModelForSequenceClassification.from_pretrained(model_address) # option 1: allenai/reviews_roberta_base  # option2: roberta-base
            trainer = Trainer(
                model=model,
                args=training_args,
                train_dataset = train_dataset,
                eval_dataset = val_dataset,             # evaluation dataset must be off when doing simple K-fold cross validation
                # callbacks= [EarlyStoppingCallback(early_stopping_patience= 3,early_stopping_threshold=0.05)],
                compute_metrics = compute_metrics
            )

            trainoutput = trainer.train()
            tr_loss = trainoutput.training_loss
            pred = trainer.predict(test_main_dataset)
            results.append(["oral-care", ratio, model_name, seed, pred.metrics['test_accuracy'], pred.metrics['test_f1'], pred.metrics['test_macro_f1'],
                            pred.metrics['test_precision'], pred.metrics['test_recall'], pred.metrics['test_specificity'],
                            pred.metrics['test_mcc'], pred.metrics['test_AUC']])
            log_history = trainer.state.log_history
            all_datasets_ratio_models_seed_results[ratio][model_name].update(
                                                                           { seed : {
                                                                                            "oral-care":            [df_train.Sentence_ID.values, df_val.Sentence_ID.values, df_test.Sentence_ID.values, pred, tr_loss, log_history],
                                                                                            "electronics":          trainer.predict(prepare_auxilary_datasets(data_set_2)),
                                                                                            "baby":                 trainer.predict(prepare_auxilary_datasets(data_set_3)),
                                                                                            "pet-supplies":         trainer.predict(prepare_auxilary_datasets(data_set_4)),
                                                                                            "Sport-outdoors":       trainer.predict(prepare_auxilary_datasets(data_set_5)),
                                                                                          }
                                                                           })
        # Specify the path in Google Drive
        file_path = f'/content/drive/MyDrive/paper1/Sample_Efficiency/all_results_{ratio_label}_{model_name}'
        # Serialize and save the dictionary
        with open(file_path, 'wb') as handle:
             pickle.dump(all_datasets_ratio_models_seed_results, handle, protocol=pickle.HIGHEST_PROTOCOL)




df_result = pd.DataFrame(results, columns = ['category', 'ratio' ,'model', 'seed', 'accuracy','f1-score', 'macro_f1', 'precision', 'recall', 'specificity', 'mcc', 'AUC'])
display(df_result)
display(df_result.groupby(df_result.category).mean(numeric_only = True))


tables = {"oral-care": results,"electronics": [],"baby": [],
          "pet-supplies": [],"Sport-outdoors": []}

for rto, values in all_datasets_ratio_models_seed_results.items():
    for model_name, value in values.items():
        for seed, val in value.items():
            for dataset, v in val.items():
                if dataset == "oral-care":
                  tables["oral-care"] =  df_result
                else:
                   tables[dataset].append([dataset, rto, model_name, seed, v.metrics['test_accuracy'],
                                           v.metrics['test_f1'], v.metrics['test_macro_f1'], v.metrics['test_precision'],
                                           v.metrics['test_recall'], v.metrics['test_specificity'],
                                           pred.metrics['test_mcc'], v.metrics['test_AUC']]
                                          )


df_total_list = [pd.DataFrame(t, columns = ['category','ratio' ,'model', 'seed', 'accuracy','f1-score', 'macro_f1','precision', 'recall', 'specificity', 'mcc', 'AUC']) for k, t in tables.items()]
df = pd.concat(df_total_list).drop_duplicates().reset_index(drop=True)
display(df.groupby(df.category).mean(numeric_only = True))




The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/685 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/760k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.31M [00:00<?, ?B/s]

Model seed is: 94, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 ALBERT-L 94 2e-05 8




model.safetensors:   0%|          | 0.00/71.5M [00:00<?, ?B/s]

Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7083,0.69116,0.535,0.584821,0.528206,0.528226,0.655,0.415,0.072107,0.527419
2,0.6966,0.694256,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.449612
3,0.7092,0.694576,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.543512
4,0.7012,0.700593,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.56735
5,0.6992,0.692988,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.572381
6,0.7046,0.692982,0.50125,0.667223,0.336105,0.500626,1.0,0.0025,0.035377,0.568175


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Model seed is: 791, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 ALBERT-L 791 2e-05 8


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6864,0.69501,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.456706
2,0.7013,0.719615,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.625444
3,0.6855,0.655364,0.63,0.628141,0.629991,0.631313,0.625,0.635,0.260013,0.594575
4,0.627,0.702074,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.490781
5,0.7037,0.670456,0.57375,0.327416,0.507715,0.775701,0.2075,0.94,0.216667,0.427956
6,0.4901,0.661564,0.655,0.626016,0.652915,0.683432,0.5775,0.7325,0.313792,0.712569


  _warn_prf(average, modifier, msg_start, len(result))


Model seed is: 5, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 ALBERT-L 5 2e-05 8


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.9107,0.732262,0.5,0.663866,0.344128,0.5,0.9875,0.0125,0.0,0.481088
2,0.6884,0.658246,0.67,0.693023,0.668133,0.647826,0.745,0.595,0.343891,0.690412
3,0.7059,0.732411,0.6175,0.633094,0.616808,0.608295,0.66,0.575,0.235854,0.650181
4,0.4978,0.595667,0.7075,0.731651,0.705111,0.675847,0.7975,0.6175,0.421891,0.783238
5,0.3879,0.684259,0.70375,0.723454,0.702238,0.678337,0.775,0.6325,0.411701,0.785169
6,0.3804,0.918595,0.71,0.728337,0.708673,0.685022,0.7775,0.6425,0.42388,0.754256


Model seed is: 6932, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 ALBERT-L 6932 2e-05 8


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.5774,0.620667,0.6975,0.719258,0.695672,0.670996,0.775,0.62,0.399832,0.756037
2,0.5452,0.565824,0.71625,0.718012,0.716239,0.71358,0.7225,0.71,0.432534,0.783244
3,0.4775,0.680607,0.7,0.653179,0.694431,0.773973,0.565,0.835,0.415429,0.784125
4,0.2153,0.694217,0.73875,0.738423,0.73875,0.739348,0.7375,0.74,0.477501,0.807638
5,0.1967,0.975164,0.74625,0.745932,0.74625,0.746867,0.745,0.7475,0.492502,0.808538
6,0.0705,1.09061,0.74375,0.747226,0.743702,0.737226,0.7575,0.73,0.487684,0.807931


Model seed is: 1759, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 ALBERT-L 1759 2e-05 8


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7033,0.693648,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.567712
2,0.7097,0.69764,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.387272
3,0.6982,0.693807,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.372244
4,0.673,0.686559,0.58875,0.439523,0.557373,0.68984,0.3225,0.855,0.209704,0.6034
5,0.7018,0.694327,0.5225,0.256809,0.45253,0.578947,0.165,0.88,0.064366,0.572844
6,0.6639,0.647385,0.635,0.677704,0.628477,0.606719,0.7675,0.5025,0.280011,0.672362


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Model seed is: 323, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 ALBERT-L 323 2e-05 8


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7348,0.71082,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.513022
2,0.7095,0.70172,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.533844
3,0.7116,0.702743,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.547213
4,0.7165,0.76787,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.517859
5,0.6945,0.701913,0.49875,0.0,0.332777,0.0,0.0,0.9975,-0.035377,0.539675
6,0.6974,0.689988,0.515,0.105991,0.386615,0.676471,0.0575,0.9725,0.074358,0.632162


  _warn_prf(average, modifier, msg_start, len(result))


Model seed is: 1694, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 ALBERT-L 1694 2e-05 8


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.71,0.704766,0.495,0.650519,0.370305,0.497354,0.94,0.05,-0.021932,0.453838
2,0.7065,0.6886,0.5375,0.579545,0.532828,0.53125,0.6375,0.4375,0.076547,0.549844
3,0.7152,0.715609,0.49,0.656566,0.333137,0.494924,0.975,0.005,-0.082269,0.386912
4,0.6938,0.687671,0.57375,0.398589,0.534241,0.676647,0.2825,0.865,0.181465,0.569631
5,0.6943,0.677328,0.58625,0.583648,0.586234,0.587342,0.58,0.5925,0.172513,0.578463
6,0.7003,0.675709,0.585,0.551351,0.582652,0.6,0.51,0.66,0.171945,0.583681


Model seed is: 9741, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 ALBERT-L 9741 2e-05 8


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7419,0.696174,0.51125,0.18711,0.418846,0.555556,0.1125,0.91,0.037294,0.483013
2,0.6893,0.763476,0.56,0.399317,0.526089,0.629032,0.2925,0.8275,0.142037,0.612431
3,0.7217,0.720669,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.562134
4,0.7015,0.704075,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.549528
5,0.7174,0.702062,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.640988
6,0.6714,0.660553,0.62625,0.603974,0.625064,0.642254,0.57,0.6825,0.254113,0.675894


  _warn_prf(average, modifier, msg_start, len(result))


Model seed is: 200, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 ALBERT-L 200 2e-05 8


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7157,0.695194,0.5075,0.660345,0.382445,0.503947,0.9575,0.0575,0.034412,0.553156
2,0.6982,0.697004,0.5125,0.667802,0.376155,0.50646,0.98,0.045,0.070492,0.520656
3,0.7223,0.693768,0.49875,0.665555,0.332777,0.499374,0.9975,0.0,-0.035377,0.565656
4,0.7058,0.689414,0.53375,0.419907,0.515074,0.555556,0.3375,0.73,0.073389,0.502712
5,0.696,0.689326,0.5425,0.287938,0.475461,0.649123,0.185,0.9,0.121581,0.534322
6,0.6807,0.670678,0.5925,0.513433,0.581448,0.637037,0.43,0.755,0.195619,0.646631


Model seed is: 999, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 ALBERT-L 999 2e-05 8


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7032,0.68615,0.49125,0.64141,0.38307,0.495238,0.91,0.0725,-0.032026,0.549594
2,0.692,0.678974,0.59,0.635556,0.583492,0.572,0.715,0.465,0.185903,0.582031
3,0.6928,0.690989,0.52375,0.653321,0.446421,0.513591,0.8975,0.15,0.071508,0.546906
4,0.691,0.694946,0.49875,0.004963,0.334979,0.333333,0.0025,0.995,-0.020451,0.537212
5,0.6961,0.699826,0.5025,0.485788,0.501974,0.502674,0.47,0.535,0.005011,0.491669
6,0.6823,0.688014,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.589806


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/480 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Model seed is: 94, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 DISRoBERTa-B 94 2e-05 8




model.safetensors:   0%|          | 0.00/331M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.5897,0.576697,0.725,0.7343,0.724663,0.71028,0.76,0.69,0.451107,0.784181
2,0.4822,0.54337,0.74,0.727749,0.739472,0.763736,0.695,0.785,0.481956,0.805519
3,0.4105,0.797669,0.7275,0.677515,0.720792,0.82971,0.5725,0.8825,0.478576,0.8135
4,0.1514,1.038863,0.74375,0.768884,0.740683,0.700205,0.8525,0.635,0.499457,0.822006
5,0.1478,1.191393,0.7575,0.771765,0.756549,0.728889,0.82,0.695,0.519071,0.816937
6,0.1102,1.231004,0.75875,0.77374,0.757686,0.728477,0.825,0.6925,0.522103,0.811937


Model seed is: 791, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 DISRoBERTa-B 791 2e-05 8


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6317,0.60431,0.65875,0.680702,0.657129,0.63956,0.7275,0.59,0.320545,0.74105
2,0.4279,0.635577,0.7075,0.699229,0.707279,0.719577,0.68,0.735,0.415629,0.768613
3,0.3448,0.805421,0.70875,0.725559,0.707653,0.685969,0.77,0.6475,0.420668,0.785413
4,0.2332,1.088984,0.71875,0.705111,0.718147,0.741047,0.6725,0.765,0.439384,0.783119
5,0.2001,1.259952,0.7125,0.729412,0.711373,0.688889,0.775,0.65,0.42836,0.786463
6,0.0772,1.298942,0.71625,0.722833,0.71609,0.706444,0.74,0.6925,0.432989,0.78625


Model seed is: 5, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 DISRoBERTa-B 5 2e-05 8


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.5346,0.627613,0.67,0.712418,0.662661,0.631274,0.8175,0.5225,0.355836,0.761112
2,0.4596,0.614607,0.7025,0.705446,0.70247,0.698529,0.7125,0.6925,0.405081,0.791194
3,0.4002,0.775045,0.7275,0.742925,0.726515,0.703125,0.7875,0.6675,0.458312,0.7932
4,0.2167,1.240297,0.7,0.734513,0.694843,0.65873,0.83,0.57,0.414247,0.795038
5,0.0884,1.38687,0.72875,0.743802,0.727811,0.704698,0.7875,0.67,0.460691,0.796828
6,0.1277,1.470526,0.7275,0.744131,0.726344,0.701327,0.7925,0.6625,0.458894,0.793331


Model seed is: 6932, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 DISRoBERTa-B 6932 2e-05 8


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6224,0.577444,0.69,0.722595,0.68566,0.653846,0.8075,0.5725,0.390948,0.775294
2,0.4838,0.564765,0.73,0.736585,0.729831,0.719048,0.755,0.705,0.460576,0.809594
3,0.2673,0.809585,0.725,0.705094,0.723741,0.760116,0.6575,0.7925,0.454158,0.812631
4,0.1783,1.102619,0.73375,0.7314,0.73373,0.737913,0.725,0.7425,0.467572,0.818069
5,0.1477,1.362383,0.7375,0.739454,0.737485,0.73399,0.745,0.73,0.475053,0.816237
6,0.0982,1.393902,0.735,0.736318,0.734993,0.732673,0.74,0.73,0.470024,0.816281


Model seed is: 1759, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 DISRoBERTa-B 1759 2e-05 8


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.5931,0.60829,0.71125,0.708701,0.711228,0.715013,0.7025,0.72,0.422565,0.776325
2,0.4865,0.545714,0.72875,0.708725,0.727462,0.765217,0.66,0.7975,0.461887,0.807781
3,0.3632,0.904612,0.71125,0.749186,0.70449,0.662188,0.8625,0.56,0.443267,0.816225
4,0.2033,1.035665,0.7575,0.767386,0.757061,0.737327,0.8,0.715,0.516871,0.815281
5,0.1473,1.216461,0.7525,0.765403,0.751749,0.727477,0.8075,0.6975,0.508083,0.816537
6,0.1675,1.314078,0.755,0.768322,0.754187,0.7287,0.8125,0.6975,0.513406,0.8166


Model seed is: 323, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 DISRoBERTa-B 323 2e-05 8


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6647,0.635927,0.6425,0.712274,0.620163,0.59596,0.885,0.4,0.325895,0.780125
2,0.5694,0.545693,0.72625,0.740828,0.725381,0.703371,0.7825,0.67,0.455391,0.806181
3,0.3981,0.610775,0.73,0.733333,0.729958,0.72439,0.7425,0.7175,0.460144,0.813056
4,0.2868,0.816798,0.74625,0.755716,0.745868,0.728538,0.785,0.7075,0.493986,0.812931
5,0.1212,1.064796,0.735,0.755196,0.733184,0.701717,0.8175,0.6525,0.476532,0.817
6,0.1196,1.154116,0.73875,0.764374,0.735623,0.696099,0.8475,0.63,0.489212,0.81405


Model seed is: 1694, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 DISRoBERTa-B 1694 2e-05 8


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6274,0.624889,0.7,0.735099,0.694639,0.658103,0.8325,0.5675,0.414831,0.76655
2,0.5554,0.53688,0.72875,0.702332,0.726597,0.778116,0.64,0.8175,0.464882,0.812269
3,0.3462,0.665188,0.74,0.741935,0.739985,0.736453,0.7475,0.7325,0.480054,0.818644
4,0.2366,0.794864,0.755,0.751269,0.754945,0.762887,0.74,0.77,0.51023,0.828119
5,0.2086,1.030104,0.7575,0.777011,0.755629,0.719149,0.845,0.67,0.523072,0.8294
6,0.0472,1.059247,0.7575,0.767386,0.757061,0.737327,0.8,0.715,0.516871,0.830313


Model seed is: 9741, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 DISRoBERTa-B 9741 2e-05 8


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6738,0.589806,0.7075,0.665714,0.702857,0.776667,0.5825,0.8325,0.42861,0.799944
2,0.5662,0.672034,0.7125,0.652568,0.703683,0.824427,0.54,0.885,0.452801,0.826906
3,0.331,0.64693,0.76125,0.755442,0.761115,0.774278,0.7375,0.785,0.52309,0.840775
4,0.3025,1.090306,0.75375,0.749682,0.753685,0.762274,0.7375,0.77,0.507768,0.829581
5,0.2034,1.307474,0.7525,0.75,0.752475,0.757653,0.7425,0.7625,0.505101,0.828944
6,0.1021,1.321956,0.74375,0.745973,0.74373,0.739558,0.7525,0.735,0.487575,0.826562


Model seed is: 200, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 DISRoBERTa-B 200 2e-05 8


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6618,0.609134,0.6725,0.732106,0.655442,0.619377,0.895,0.45,0.385246,0.768912
2,0.4921,0.564426,0.71,0.741071,0.705763,0.669355,0.83,0.59,0.432645,0.80045
3,0.4301,0.72834,0.70875,0.691391,0.707826,0.735211,0.6525,0.765,0.420167,0.802119
4,0.1457,1.063259,0.70875,0.746464,0.70216,0.660886,0.8575,0.56,0.4373,0.808025
5,0.1506,1.229507,0.72875,0.723567,0.728655,0.737662,0.71,0.7475,0.457822,0.808331
6,0.0402,1.300623,0.7225,0.727941,0.722389,0.713942,0.7425,0.7025,0.445356,0.810712


Model seed is: 999, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 DISRoBERTa-B 999 2e-05 8


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6366,0.620829,0.64875,0.717019,0.627043,0.600337,0.89,0.4075,0.339652,0.755056
2,0.5361,0.583918,0.71125,0.729825,0.709879,0.685714,0.78,0.6425,0.426551,0.791469
3,0.4237,0.756567,0.7175,0.713198,0.717436,0.724227,0.7025,0.7325,0.435196,0.790088
4,0.3536,0.992396,0.72125,0.746879,0.718363,0.683992,0.8225,0.62,0.451862,0.791131
5,0.2105,1.111217,0.7275,0.745327,0.726158,0.699561,0.7975,0.6575,0.459526,0.792612
6,0.0838,1.26605,0.725,0.742389,0.723741,0.698238,0.7925,0.6575,0.454158,0.788725




config.json:   0%|          | 0.00/760 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/798k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.38M [00:00<?, ?B/s]

Model seed is: 94, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 XLNet-B 94 2e-05 8




pytorch_model.bin:   0%|          | 0.00/467M [00:00<?, ?B/s]

Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6487,0.605962,0.6625,0.654731,0.662329,0.670157,0.64,0.685,0.32533,0.72075
2,0.5604,0.576546,0.7075,0.717391,0.707141,0.693925,0.7425,0.6725,0.416021,0.76545
3,0.511,0.643217,0.725,0.715026,0.724663,0.741935,0.69,0.76,0.451107,0.782412
4,0.2863,1.050414,0.70375,0.745981,0.695329,0.652908,0.87,0.5375,0.432084,0.8031
5,0.1516,1.463386,0.7025,0.746809,0.693101,0.65,0.8775,0.5275,0.432346,0.806688
6,0.0688,1.334714,0.72875,0.758082,0.724703,0.684105,0.85,0.6075,0.471576,0.811531


Model seed is: 791, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 XLNet-B 791 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6649,0.653633,0.615,0.696252,0.585328,0.574919,0.8825,0.3475,0.272237,0.72075
2,0.5377,0.650586,0.685,0.688889,0.684951,0.680488,0.6975,0.6725,0.370116,0.757431
3,0.4556,0.657822,0.71375,0.699869,0.713136,0.735537,0.6675,0.76,0.429341,0.782194
4,0.2917,1.051571,0.715,0.698413,0.714135,0.741573,0.66,0.77,0.432625,0.771794
5,0.2944,1.34223,0.715,0.706186,0.714743,0.728723,0.685,0.745,0.430776,0.774181
6,0.0972,1.502721,0.71375,0.732164,0.712391,0.687912,0.7825,0.645,0.431599,0.77455


Model seed is: 5, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 XLNet-B 5 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.5984,0.735183,0.62375,0.698699,0.598933,0.582638,0.8725,0.375,0.285314,0.7308
2,0.4689,0.604744,0.70625,0.718563,0.705687,0.689655,0.75,0.6625,0.414088,0.779931
3,0.4422,0.788764,0.705,0.724942,0.703441,0.679039,0.7775,0.6325,0.414379,0.785937
4,0.1938,1.413667,0.695,0.733624,0.68845,0.651163,0.84,0.55,0.407512,0.780481
5,0.1106,1.618476,0.71625,0.73388,0.714999,0.690949,0.7825,0.65,0.436347,0.785937
6,0.055,1.762314,0.715,0.735499,0.713278,0.686147,0.7925,0.6375,0.43526,0.780956


Model seed is: 6932, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 XLNet-B 6932 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6635,0.620763,0.6525,0.655087,0.65248,0.650246,0.66,0.645,0.305034,0.710281
2,0.6098,0.62195,0.69,0.732181,0.682114,0.644487,0.8475,0.5325,0.400383,0.756412
3,0.4122,0.776383,0.7025,0.679245,0.700928,0.736842,0.63,0.775,0.409326,0.773144
4,0.2291,1.103872,0.6825,0.723913,0.675192,0.640385,0.8325,0.5325,0.382624,0.780738
5,0.2609,1.574633,0.69625,0.738428,0.688141,0.648393,0.8575,0.535,0.414655,0.78445
6,0.1384,1.614281,0.7,0.730942,0.695979,0.662602,0.815,0.585,0.411019,0.78525


Model seed is: 1759, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 XLNet-B 1759 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6281,0.658249,0.645,0.708419,0.627372,0.601045,0.8625,0.4275,0.322068,0.753262
2,0.615,0.584284,0.7025,0.71934,0.701425,0.680804,0.7625,0.6425,0.407948,0.772731
3,0.5877,1.042761,0.635,0.721374,0.596194,0.583333,0.945,0.325,0.344124,0.779894
4,0.3939,0.812984,0.74125,0.744129,0.741217,0.735941,0.7525,0.73,0.482622,0.801181
5,0.1861,1.156335,0.73125,0.752018,0.729352,0.698073,0.815,0.6475,0.469128,0.805125
6,0.3266,1.424656,0.72125,0.748023,0.718067,0.682474,0.8275,0.615,0.452842,0.804331


Model seed is: 323, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 XLNet-B 323 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7084,0.659539,0.60625,0.705332,0.556056,0.563528,0.9425,0.27,0.287124,0.753531
2,0.6571,0.584667,0.6875,0.720982,0.682934,0.65121,0.8075,0.5675,0.38629,0.776975
3,0.5082,0.579781,0.73625,0.746089,0.735853,0.719258,0.775,0.6975,0.473925,0.802631
4,0.552,0.818168,0.72875,0.740741,0.728169,0.709382,0.775,0.6825,0.45947,0.79295
5,0.2156,1.221884,0.72625,0.745645,0.724649,0.696312,0.8025,0.65,0.457855,0.785438
6,0.1191,1.300447,0.72375,0.742724,0.722239,0.694989,0.7975,0.65,0.452449,0.788744


Model seed is: 1694, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 XLNet-B 1694 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7,0.685598,0.575,0.395018,0.533732,0.685185,0.2775,0.8725,0.186631,0.602125
2,0.6635,0.619381,0.655,0.629032,0.653301,0.680233,0.585,0.725,0.313083,0.720081
3,0.56,0.595318,0.70625,0.729574,0.704048,0.675906,0.7925,0.62,0.418778,0.767956
4,0.4267,0.60644,0.71375,0.72509,0.713262,0.69746,0.755,0.6725,0.428962,0.793519
5,0.4702,0.782624,0.72375,0.75847,0.717921,0.673786,0.8675,0.58,0.467226,0.802256
6,0.2778,0.776386,0.7475,0.760095,0.746802,0.723982,0.8,0.695,0.497751,0.803288


Model seed is: 9741, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 XLNet-B 9741 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7093,0.637335,0.62625,0.45735,0.586158,0.834437,0.315,0.9375,0.322634,0.740187
2,0.6189,0.588724,0.68625,0.61562,0.675286,0.794466,0.5025,0.87,0.400528,0.779713
3,0.376,0.632773,0.7375,0.746377,0.737178,0.721963,0.7725,0.7025,0.476168,0.813031
4,0.3067,0.848154,0.74,0.752381,0.739348,0.718182,0.79,0.69,0.482418,0.819781
5,0.2755,1.062299,0.74,0.747573,0.739766,0.726415,0.77,0.71,0.480866,0.81435
6,0.1394,1.118843,0.745,0.748148,0.74496,0.739024,0.7575,0.7325,0.490153,0.816813


Model seed is: 200, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 XLNet-B 200 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6386,0.62829,0.66875,0.640434,0.666683,0.700297,0.59,0.7475,0.341766,0.734619
2,0.5564,0.615282,0.66875,0.729867,0.650879,0.616179,0.895,0.4425,0.378463,0.787375
3,0.3972,0.648167,0.7025,0.737307,0.697184,0.660079,0.835,0.57,0.420016,0.80615
4,0.2491,1.127422,0.6975,0.744186,0.687078,0.644689,0.88,0.515,0.424272,0.8055
5,0.119,1.205137,0.72375,0.744509,0.721914,0.692473,0.805,0.6425,0.453528,0.811794
6,0.1572,1.461708,0.71,0.739326,0.706283,0.671429,0.8225,0.5975,0.431053,0.808956


Model seed is: 999, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 XLNet-B 999 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6736,0.664845,0.6,0.678068,0.575008,0.56734,0.8425,0.3575,0.228698,0.692038
2,0.6455,0.622262,0.67125,0.619392,0.665032,0.735395,0.535,0.8075,0.355971,0.740706
3,0.6149,0.724333,0.69125,0.649645,0.686834,0.75082,0.5725,0.81,0.393767,0.766119
4,0.3613,0.727561,0.71,0.704835,0.709911,0.717617,0.6925,0.7275,0.420257,0.774406
5,0.2387,0.908778,0.71625,0.712294,0.716196,0.722365,0.7025,0.73,0.432664,0.788988
6,0.1054,1.016728,0.70875,0.709114,0.70875,0.708229,0.71,0.7075,0.417501,0.785719




config.json:   0%|          | 0.00/761 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/798k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.38M [00:00<?, ?B/s]

Model seed is: 94, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 XLNet-L 94 2e-05 8




pytorch_model.bin:   0%|          | 0.00/1.44G [00:00<?, ?B/s]

Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7028,0.705621,0.5,0.004975,0.335543,0.5,0.0025,0.9975,0.0,0.542747
2,0.6986,0.693613,0.50125,0.664987,0.344719,0.500632,0.99,0.0125,0.011852,0.585216
3,0.7052,0.691392,0.5075,0.663248,0.373484,0.503896,0.97,0.045,0.039477,0.606775
4,0.6788,0.669797,0.58875,0.624,0.585103,0.574737,0.6825,0.495,0.180705,0.644656
5,0.6391,0.655864,0.61625,0.648339,0.613028,0.598309,0.7075,0.525,0.236471,0.673825
6,0.6229,0.733955,0.6175,0.623153,0.617414,0.614078,0.6325,0.6025,0.235106,0.6772


Model seed is: 791, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 XLNet-L 791 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.707,0.747624,0.52,0.672913,0.385752,0.510336,0.9875,0.0525,0.112788,0.627541
2,0.5859,0.651022,0.64125,0.67933,0.636119,0.614141,0.76,0.5225,0.290821,0.713337
3,0.5598,0.754598,0.625,0.575071,0.61975,0.663399,0.5075,0.7425,0.257203,0.697631
4,0.4097,0.772767,0.665,0.597598,0.65533,0.74812,0.4975,0.8325,0.350237,0.743944
5,0.3774,1.035045,0.6475,0.660241,0.647004,0.637209,0.685,0.61,0.295833,0.737831
6,0.2355,1.090964,0.67125,0.676507,0.671163,0.66586,0.6875,0.655,0.342681,0.750787


Model seed is: 5, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 XLNet-L 5 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7375,0.697426,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.45975
2,0.7095,0.703606,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.459075
3,0.7122,0.693604,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.494356
4,0.7245,0.698802,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.445406
5,0.7058,0.710195,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.57245
6,0.7235,0.70721,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.585147


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


Model seed is: 6932, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 XLNet-L 6932 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7701,0.672042,0.565,0.644172,0.542343,0.544983,0.7875,0.3425,0.145165,0.655181
2,0.648,0.629645,0.6375,0.642857,0.637418,0.633495,0.6525,0.6225,0.275124,0.713163
3,0.7356,0.726707,0.50125,0.004988,0.336105,1.0,0.0025,1.0,0.035377,0.700144
4,0.5213,0.65337,0.6625,0.736328,0.633789,0.604167,0.9425,0.3825,0.392279,0.76585
5,0.4758,0.555669,0.71375,0.730905,0.712582,0.689579,0.7775,0.65,0.431018,0.802194
6,0.3696,0.572801,0.7175,0.744344,0.714351,0.679752,0.8225,0.6125,0.444921,0.80765


Model seed is: 1759, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 XLNet-L 1759 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7169,0.693637,0.51375,0.663203,0.394523,0.507285,0.9575,0.07,0.059678,0.611
2,0.6661,0.628249,0.65375,0.663426,0.653464,0.64539,0.6825,0.625,0.30801,0.698381
3,0.6374,0.66498,0.65,0.71134,0.633448,0.605263,0.8625,0.4375,0.331421,0.754869
4,0.5347,0.591319,0.6925,0.682171,0.692175,0.705882,0.66,0.725,0.385816,0.761938
5,0.4366,0.679105,0.69625,0.716453,0.6947,0.671772,0.7675,0.625,0.396547,0.777081
6,0.3577,0.771602,0.70625,0.721893,0.705318,0.685393,0.7625,0.65,0.415135,0.781081


Model seed is: 323, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 XLNet-L 323 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7158,0.693073,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.599872
2,0.7354,0.693035,0.50125,0.004988,0.336105,1.0,0.0025,1.0,0.035377,0.628553
3,0.6996,0.703499,0.5025,0.00995,0.338865,1.0,0.005,1.0,0.050063,0.619772
4,0.6706,0.697148,0.5175,0.673986,0.373051,0.508929,0.9975,0.0375,0.125,0.713194
5,0.6444,0.629212,0.655,0.709474,0.642429,0.612727,0.8425,0.4675,0.334403,0.751506
6,0.5312,0.634025,0.6925,0.721088,0.689235,0.659751,0.795,0.59,0.393354,0.757238


Model seed is: 1694, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 XLNet-L 1694 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7299,0.693416,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.533197
2,0.7235,0.693343,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.554416
3,0.7067,0.693354,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.511737
4,0.7171,0.693304,0.50125,0.014815,0.340462,0.6,0.0075,0.995,0.015861,0.58575
5,0.7266,0.693922,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.562362
6,0.7283,0.694513,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.548706


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


Model seed is: 9741, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 XLNet-L 9741 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6968,0.709245,0.52375,0.173536,0.419516,0.655738,0.1,0.9475,0.089488,0.532925
2,0.6847,0.668117,0.5675,0.302419,0.494507,0.78125,0.1875,0.9475,0.207717,0.703181
3,0.6074,0.56272,0.7,0.665738,0.696815,0.751572,0.5975,0.8025,0.40868,0.792562
4,0.5185,0.570888,0.72,0.737705,0.718718,0.693833,0.7875,0.6525,0.444065,0.802444
5,0.4644,0.67642,0.74375,0.761905,0.742251,0.711497,0.82,0.6675,0.49327,0.812969
6,0.2938,0.755586,0.74375,0.746601,0.743718,0.738386,0.755,0.7325,0.487623,0.818163


Model seed is: 200, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 XLNet-L 200 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7308,0.696985,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.576937
2,0.7089,0.68619,0.57375,0.62975,0.563771,0.556622,0.725,0.4225,0.15475,0.622275
3,0.6722,0.706388,0.525,0.62963,0.483804,0.515974,0.8075,0.2425,0.060599,0.6072
4,0.5623,0.637675,0.6475,0.690789,0.640453,0.615234,0.7875,0.5075,0.307292,0.734819
5,0.5717,0.61022,0.70375,0.739846,0.697935,0.659491,0.8425,0.565,0.424159,0.777631
6,0.4297,0.636373,0.72125,0.734207,0.720586,0.701595,0.77,0.6725,0.444618,0.787588


Model seed is: 999, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 XLNet-L 999 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7112,0.731571,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.544572
2,0.6958,0.695796,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.534303
3,0.7255,0.69541,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.592688
4,0.6843,0.708424,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.629156
5,0.6889,0.702405,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.646619
6,0.6759,0.672984,0.58875,0.466775,0.566042,0.663594,0.36,0.8175,0.199615,0.670319


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/474 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Model seed is: 94, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 DEBERT-B 94 2e-05 8




pytorch_model.bin:   0%|          | 0.00/559M [00:00<?, ?B/s]

Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6967,0.693359,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.560359
2,0.7045,0.693279,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.535566
3,0.6933,0.694117,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.510097
4,0.7131,0.693442,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.604
5,0.6928,0.69324,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.565919
6,0.694,0.693227,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.575659


  _warn_prf(average, modifier, msg_start, len(result))


Model seed is: 791, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 DEBERT-B 791 2e-05 8


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6327,0.616919,0.66125,0.707659,0.652492,0.622391,0.82,0.5025,0.340097,0.752106
2,0.383,0.792698,0.69375,0.690265,0.693711,0.69821,0.6825,0.705,0.387598,0.760763
3,0.2168,1.173338,0.69375,0.707986,0.69302,0.676538,0.7425,0.645,0.389355,0.769616
4,0.1201,1.484764,0.69125,0.731813,0.684021,0.646833,0.8425,0.54,0.401301,0.754544
5,0.2209,1.586416,0.69125,0.734123,0.683008,0.644612,0.8525,0.53,0.404091,0.720581
6,0.0959,1.578017,0.7025,0.726437,0.700205,0.67234,0.79,0.615,0.411348,0.744681


Model seed is: 5, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 DEBERT-B 5 2e-05 8


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.5508,0.61091,0.68,0.693046,0.679421,0.665899,0.7225,0.6375,0.361308,0.754306
2,0.4816,0.596394,0.7,0.721578,0.698187,0.67316,0.7775,0.6225,0.404893,0.794444
3,0.4405,0.852527,0.705,0.731818,0.70202,0.670833,0.805,0.605,0.418454,0.786675
4,0.2158,1.325842,0.725,0.754464,0.720982,0.681452,0.845,0.605,0.463548,0.79185
5,0.144,1.44928,0.73375,0.749117,0.732747,0.708241,0.795,0.6725,0.471048,0.796419
6,0.0529,1.533935,0.7325,0.752887,0.730667,0.699571,0.815,0.65,0.471462,0.794544


Model seed is: 6932, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 DEBERT-B 6932 2e-05 8


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6335,0.632758,0.66625,0.734856,0.642302,0.609555,0.925,0.4075,0.388578,0.764381
2,0.4663,0.587796,0.7275,0.752273,0.724747,0.689583,0.8275,0.6275,0.464382,0.805088
3,0.4455,0.981812,0.74,0.734015,0.739868,0.751309,0.7175,0.7625,0.480487,0.816331
4,0.1666,1.158969,0.73875,0.765957,0.735171,0.693712,0.855,0.6225,0.490954,0.819806
5,0.0509,1.410925,0.7375,0.738155,0.737498,0.736318,0.74,0.735,0.475006,0.814325
6,0.0926,1.433433,0.74,0.755869,0.738897,0.712389,0.805,0.675,0.484108,0.815131


Model seed is: 1759, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 DEBERT-B 1759 2e-05 8


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.5512,0.581706,0.705,0.701266,0.704954,0.710256,0.6925,0.7175,0.410128,0.781362
2,0.4932,0.528017,0.73875,0.730323,0.738495,0.754667,0.7075,0.77,0.478435,0.819825
3,0.3243,0.940277,0.7275,0.758315,0.722997,0.681275,0.855,0.6,0.470556,0.819412
4,0.259,1.227754,0.73625,0.718291,0.735174,0.770774,0.6725,0.8,0.476388,0.813256
5,0.0888,1.437812,0.74875,0.742638,0.748608,0.761155,0.725,0.7725,0.498062,0.808144
6,0.1093,1.493969,0.74,0.741935,0.739985,0.736453,0.7475,0.7325,0.480054,0.80985


Model seed is: 323, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 DEBERT-B 323 2e-05 8


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6722,0.715136,0.5675,0.691622,0.483886,0.537396,0.97,0.165,0.227551,0.745169
2,0.5416,0.573095,0.71,0.719128,0.709693,0.697183,0.7425,0.6775,0.42089,0.780094
3,0.3996,0.733729,0.70125,0.731762,0.697334,0.663951,0.815,0.5875,0.413339,0.783913
4,0.2916,1.301468,0.68625,0.729817,0.677874,0.640832,0.8475,0.525,0.393526,0.788494
5,0.0868,1.604916,0.675,0.727463,0.662493,0.626354,0.8675,0.4825,0.379233,0.782919
6,0.0483,1.587346,0.69875,0.72267,0.696492,0.66951,0.785,0.6125,0.403549,0.779081


Model seed is: 1694, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 DEBERT-B 1694 2e-05 8


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7186,0.69737,0.5,0.66611,0.335543,0.5,0.9975,0.0025,0.0,0.568806
2,0.6511,0.587597,0.70625,0.726426,0.704644,0.679739,0.78,0.6325,0.417062,0.764569
3,0.4196,0.599627,0.725,0.7343,0.724663,0.71028,0.76,0.69,0.451107,0.781525
4,0.2913,0.731858,0.73,0.720207,0.729669,0.747312,0.695,0.765,0.461131,0.792444
5,0.2271,0.8941,0.7525,0.759709,0.752277,0.738208,0.7825,0.7225,0.505911,0.803956
6,0.192,1.002657,0.7475,0.764019,0.746257,0.717105,0.8175,0.6775,0.499923,0.803906


Model seed is: 9741, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 DEBERT-B 9741 2e-05 8


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6945,0.692851,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.679488
2,0.6915,0.696666,0.615,0.496732,0.592496,0.716981,0.38,0.85,0.260574,0.722819
3,0.5969,0.59183,0.6975,0.668493,0.695166,0.739394,0.61,0.785,0.401191,0.774519
4,0.4356,0.624752,0.7275,0.7275,0.7275,0.7275,0.7275,0.7275,0.455,0.794419
5,0.2638,0.877127,0.72625,0.742656,0.725133,0.700665,0.79,0.6625,0.456223,0.791725
6,0.2689,0.993881,0.72625,0.740828,0.725381,0.703371,0.7825,0.67,0.455391,0.791712


Model seed is: 200, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 DEBERT-B 200 2e-05 8


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6375,0.614989,0.67,0.60479,0.660764,0.753731,0.505,0.835,0.360177,0.7412
2,0.5125,0.556646,0.71125,0.749186,0.70449,0.662188,0.8625,0.56,0.443267,0.806412
3,0.298,0.703827,0.7275,0.740476,0.726817,0.706818,0.7775,0.6775,0.457292,0.806412
4,0.3198,1.099197,0.72125,0.752497,0.716735,0.676647,0.8475,0.595,0.457319,0.809656
5,0.1098,1.356312,0.745,0.746898,0.744986,0.741379,0.7525,0.7375,0.490055,0.807169
6,0.037,1.412076,0.74,0.742574,0.739974,0.735294,0.75,0.73,0.480096,0.80985


Model seed is: 999, total number of training (for each informative and uninform class) and test samples: 800, 6400 0.1 DEBERT-B 999 2e-05 8


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7107,0.637652,0.6025,0.708257,0.542364,0.55942,0.965,0.24,0.297641,0.613894
2,0.5878,0.581634,0.70375,0.715486,0.703245,0.688222,0.745,0.6625,0.408894,0.76595
3,0.4764,0.671592,0.735,0.732323,0.734973,0.739796,0.725,0.745,0.470094,0.789525
4,0.3089,1.195611,0.705,0.734831,0.701219,0.667347,0.8175,0.5925,0.42079,0.784663
5,0.1744,1.262734,0.7175,0.735981,0.716109,0.690789,0.7875,0.6475,0.439327,0.7761
6,0.0682,1.369297,0.715,0.736111,0.713164,0.685345,0.795,0.635,0.435612,0.772687




Model seed is: 94, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 ALBERT-L 94 2e-05 8


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7065,0.686336,0.545,0.67325,0.462139,0.52521,0.9375,0.1525,0.145279,0.591075
2,0.7121,0.689503,0.545,0.580645,0.541689,0.538462,0.63,0.46,0.091329,0.550394
3,0.7262,0.701886,0.4925,0.634892,0.401462,0.495787,0.8825,0.1025,-0.02397,0.509637
4,0.7176,0.699061,0.515,0.564045,0.508783,0.512245,0.6275,0.4025,0.030789,0.497787
5,0.7179,0.700754,0.47625,0.066815,0.351392,0.306122,0.0375,0.915,-0.099046,0.489063
6,0.7099,0.691404,0.51375,0.508217,0.513688,0.514066,0.5025,0.525,0.027507,0.536412


Model seed is: 791, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 ALBERT-L 791 2e-05 8


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6814,0.655131,0.63,0.647619,0.629073,0.618182,0.68,0.58,0.26131,0.665931
2,0.5711,0.589277,0.6975,0.723112,0.694889,0.666667,0.79,0.605,0.401938,0.762225
3,0.2774,0.747204,0.7025,0.656069,0.696977,0.777397,0.5675,0.8375,0.420622,0.744506
4,0.1957,0.887333,0.70875,0.709114,0.70875,0.708229,0.71,0.7075,0.417501,0.7704
5,0.0542,1.132825,0.715,0.717822,0.714971,0.710784,0.725,0.705,0.430086,0.762912
6,0.0083,1.206374,0.70375,0.720189,0.702724,0.682327,0.7625,0.645,0.410342,0.763338


Model seed is: 5, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 ALBERT-L 5 2e-05 8


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6363,0.581682,0.69375,0.726257,0.68937,0.656566,0.8125,0.575,0.398914,0.790681
2,0.4682,0.585899,0.7225,0.738824,0.721412,0.697778,0.785,0.66,0.448518,0.793188
3,0.363,0.686115,0.7225,0.740654,0.721134,0.695175,0.7925,0.6525,0.449426,0.796481
4,0.1618,0.892724,0.7475,0.761229,0.746662,0.721973,0.805,0.69,0.498306,0.799363
5,0.0408,1.219293,0.74375,0.748466,0.74366,0.73494,0.7625,0.725,0.487843,0.793438
6,0.0309,1.330591,0.74875,0.755177,0.748577,0.736342,0.775,0.7225,0.498187,0.789394


Model seed is: 6932, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 ALBERT-L 6932 2e-05 8


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6417,0.607892,0.67625,0.733813,0.660367,0.623037,0.8925,0.46,0.390957,0.777756
2,0.4461,0.62817,0.6875,0.736287,0.676426,0.636861,0.8725,0.5025,0.403646,0.790875
3,0.2822,0.860455,0.71375,0.681502,0.710785,0.768025,0.6125,0.815,0.436544,0.780906
4,0.132,1.054483,0.7125,0.720874,0.712241,0.700472,0.7425,0.6825,0.425767,0.785919
5,0.0763,1.338393,0.72,0.724138,0.719937,0.713592,0.735,0.705,0.440198,0.785937
6,0.043,1.397138,0.7175,0.723716,0.717357,0.708134,0.74,0.695,0.435441,0.785587


Model seed is: 1759, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 ALBERT-L 1759 2e-05 8


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.708,0.678597,0.58625,0.631813,0.579815,0.569138,0.71,0.4625,0.178039,0.574362
2,0.6415,0.617323,0.65,0.70339,0.63828,0.610294,0.83,0.47,0.32156,0.753469
3,0.5391,0.59527,0.68,0.677582,0.679982,0.682741,0.6725,0.6875,0.360041,0.758125
4,0.5027,0.606632,0.70125,0.737651,0.695386,0.657534,0.84,0.5625,0.418954,0.790006
5,0.2895,0.867369,0.715,0.748899,0.709709,0.669291,0.85,0.58,0.446586,0.797419
6,0.1907,0.94066,0.725,0.738717,0.72424,0.70362,0.7775,0.6725,0.452501,0.790106


Model seed is: 323, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 ALBERT-L 323 2e-05 8


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7804,0.761694,0.49875,0.029056,0.345615,0.461538,0.015,0.9825,-0.009886,0.487831
2,0.7335,0.705954,0.50625,0.211577,0.42608,0.524752,0.1325,0.88,0.018818,0.515825
3,0.7163,0.698485,0.4975,0.605108,0.457193,0.498382,0.77,0.225,-0.005963,0.508319
4,0.7061,0.689573,0.55625,0.561187,0.556194,0.555012,0.5675,0.545,0.112528,0.554125
5,0.6678,0.707071,0.52625,0.646125,0.464839,0.515648,0.865,0.1875,0.071378,0.546381
6,0.6302,0.702232,0.56375,0.585018,0.562601,0.557823,0.615,0.5125,0.128175,0.594719


Model seed is: 1694, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 ALBERT-L 1694 2e-05 8


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7141,0.698786,0.505,0.641953,0.420167,0.502833,0.8875,0.1225,0.015527,0.478337
2,0.6349,0.640969,0.63125,0.628931,0.631236,0.632911,0.625,0.6375,0.262521,0.688769
3,0.5686,0.663234,0.6575,0.707265,0.647307,0.617537,0.8275,0.4875,0.334955,0.7333
4,0.2432,0.785635,0.69125,0.709753,0.68999,0.669623,0.755,0.6275,0.385647,0.756594
5,0.1172,1.152949,0.7,0.705882,0.69988,0.692308,0.72,0.68,0.40032,0.749656
6,0.0623,1.348336,0.70375,0.70559,0.703738,0.701235,0.71,0.6975,0.407532,0.748181


Model seed is: 9741, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 ALBERT-L 9741 2e-05 8


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7095,0.700779,0.48625,0.575851,0.462252,0.490334,0.6975,0.275,-0.030341,0.438356
2,0.7161,0.690147,0.51,0.433526,0.500904,0.513699,0.375,0.645,0.020771,0.53875
3,0.7006,0.695293,0.5,0.585062,0.478066,0.5,0.705,0.295,0.0,0.509669
4,0.7168,0.694896,0.51,0.644283,0.428567,0.505698,0.8875,0.1325,0.030501,0.529794
5,0.7124,0.695675,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.480041
6,0.708,0.695131,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.495266


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


Model seed is: 200, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 ALBERT-L 200 2e-05 8


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.679,0.66503,0.58125,0.694064,0.515349,0.546763,0.95,0.2125,0.240617,0.701744
2,0.6095,0.647006,0.6375,0.683406,0.629715,0.606589,0.7825,0.4925,0.287348,0.703425
3,0.4454,0.728131,0.6725,0.644022,0.67039,0.705357,0.5925,0.7525,0.349503,0.737569
4,0.3225,0.891602,0.68125,0.716981,0.676087,0.644711,0.8075,0.555,0.374639,0.767044
5,0.1473,1.152656,0.70875,0.697009,0.708312,0.726287,0.67,0.7475,0.418759,0.757288
6,0.0838,1.288849,0.7025,0.709046,0.702349,0.69378,0.725,0.68,0.405411,0.755481


Model seed is: 999, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 ALBERT-L 999 2e-05 8


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7471,0.733643,0.49875,0.004963,0.334979,0.333333,0.0025,0.995,-0.020451,0.533406
2,0.7528,0.698801,0.505,0.024631,0.346486,0.833333,0.0125,0.9975,0.057953,0.472869
3,0.7239,0.703019,0.495,0.161826,0.400233,0.47561,0.0975,0.8925,-0.016485,0.472625
4,0.68,0.721366,0.52125,0.631376,0.474333,0.513302,0.82,0.2225,0.053001,0.531981
5,0.7217,0.702018,0.49875,0.141328,0.3937,0.492537,0.0825,0.915,-0.004512,0.474231
6,0.7007,0.719223,0.445,0.562992,0.401359,0.464286,0.715,0.175,-0.130693,0.443812




Model seed is: 94, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 DISRoBERTa-B 94 2e-05 8


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.68,0.652215,0.6825,0.703271,0.680937,0.660088,0.7525,0.6125,0.36863,0.742894
2,0.4959,0.587044,0.70875,0.71481,0.708618,0.70024,0.73,0.6875,0.417878,0.789238
3,0.4384,0.722902,0.72375,0.711111,0.72322,0.745205,0.68,0.7675,0.449223,0.79335
4,0.2686,0.85433,0.71,0.694737,0.709273,0.733333,0.66,0.76,0.422116,0.790675
5,0.3132,1.096115,0.71875,0.738676,0.717105,0.689805,0.795,0.6425,0.442678,0.790794
6,0.0682,1.095108,0.7225,0.725926,0.722457,0.717073,0.735,0.71,0.445139,0.795469


Model seed is: 791, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 DISRoBERTa-B 791 2e-05 8


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6669,0.636477,0.64625,0.708548,0.629314,0.602452,0.86,0.4325,0.323556,0.743306
2,0.492,0.654506,0.67125,0.724029,0.658769,0.62387,0.8625,0.48,0.370689,0.774806
3,0.2822,0.742103,0.705,0.696658,0.704777,0.716931,0.6775,0.7325,0.410622,0.780675
4,0.2117,1.007723,0.715,0.704663,0.71465,0.731183,0.68,0.75,0.431057,0.780488
5,0.2595,1.223885,0.71125,0.729191,0.709977,0.686534,0.7775,0.645,0.426258,0.7799
6,0.0821,1.2251,0.70625,0.710947,0.706172,0.699758,0.7225,0.69,0.412718,0.77945


Model seed is: 5, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 DISRoBERTa-B 5 2e-05 8


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6777,0.647702,0.6175,0.691532,0.594121,0.579392,0.8575,0.3775,0.267877,0.722162
2,0.56,0.596801,0.68375,0.693333,0.683441,0.672941,0.715,0.6525,0.36822,0.757925
3,0.4799,0.636795,0.71625,0.72418,0.716015,0.704492,0.745,0.6875,0.433217,0.785338
4,0.2796,0.799156,0.7025,0.700252,0.702483,0.705584,0.695,0.71,0.405046,0.788606
5,0.1942,0.945197,0.7075,0.703046,0.707434,0.713918,0.6925,0.7225,0.415187,0.792006
6,0.1194,1.028605,0.71125,0.712329,0.711246,0.709677,0.715,0.7075,0.422512,0.792656


Model seed is: 6932, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 DISRoBERTa-B 6932 2e-05 8


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6718,0.63956,0.62125,0.695477,0.597325,0.581513,0.865,0.3775,0.277739,0.748606
2,0.4766,0.692513,0.67,0.717345,0.660474,0.627341,0.8375,0.5025,0.360851,0.777656
3,0.3332,0.71661,0.70875,0.692206,0.707906,0.733894,0.655,0.7625,0.419933,0.781775
4,0.2592,1.001525,0.69625,0.715789,0.694807,0.672527,0.765,0.6275,0.396264,0.7868
5,0.0969,1.14741,0.705,0.699746,0.70491,0.712435,0.6875,0.7225,0.410251,0.785188
6,0.0583,1.207858,0.70875,0.706179,0.708728,0.712468,0.7,0.7175,0.417564,0.7846


Model seed is: 1759, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 DISRoBERTa-B 1759 2e-05 8


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6916,0.651721,0.68125,0.660453,0.68005,0.706553,0.62,0.7425,0.365251,0.754781
2,0.5329,0.588457,0.7125,0.743875,0.70812,0.670683,0.835,0.59,0.43836,0.803512
3,0.37,0.633293,0.7225,0.721805,0.722498,0.723618,0.72,0.725,0.445006,0.804881
4,0.2335,0.86792,0.72625,0.715953,0.72589,0.743935,0.69,0.7625,0.453694,0.80865
5,0.1282,1.120025,0.735,0.747017,0.734401,0.714612,0.7825,0.6875,0.472135,0.813487
6,0.1218,1.203391,0.7375,0.744526,0.737301,0.725118,0.765,0.71,0.47572,0.808888


Model seed is: 323, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 DISRoBERTa-B 323 2e-05 8


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6912,0.678414,0.6725,0.702273,0.669192,0.64375,0.7725,0.5725,0.352114,0.768788
2,0.62,0.653038,0.655,0.721774,0.633913,0.60473,0.895,0.415,0.35337,0.788894
3,0.3816,0.737473,0.66375,0.732338,0.640119,0.608264,0.92,0.4075,0.381396,0.80865
4,0.3101,0.664259,0.71875,0.738676,0.717105,0.689805,0.795,0.6425,0.442678,0.812281
5,0.204,1.000303,0.68625,0.74097,0.671594,0.630931,0.8975,0.475,0.410983,0.807956
6,0.0638,0.909312,0.71875,0.74804,0.714897,0.677485,0.835,0.6025,0.449827,0.809962


Model seed is: 1694, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 DISRoBERTa-B 1694 2e-05 8


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7053,0.685868,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.715413
2,0.6047,0.59547,0.66875,0.684148,0.667961,0.653759,0.7175,0.62,0.339116,0.755556
3,0.4731,0.633198,0.68125,0.711864,0.677611,0.649485,0.7875,0.575,0.370973,0.768088
4,0.3262,0.752599,0.6975,0.727477,0.693795,0.661885,0.8075,0.5875,0.404921,0.782056
5,0.2449,0.901985,0.69625,0.698885,0.696227,0.692875,0.705,0.6875,0.39256,0.770056
6,0.119,0.974932,0.70125,0.713085,0.700741,0.685912,0.7425,0.66,0.403877,0.772244


Model seed is: 9741, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 DISRoBERTa-B 9741 2e-05 8


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6783,0.669021,0.5925,0.375479,0.536534,0.803279,0.245,0.94,0.257298,0.712925
2,0.6123,0.547771,0.71375,0.721072,0.713553,0.703088,0.74,0.6875,0.42809,0.798038
3,0.3884,0.579679,0.73125,0.748538,0.729974,0.703297,0.8,0.6625,0.466935,0.811419
4,0.248,0.711641,0.73,0.73201,0.729985,0.726601,0.7375,0.7225,0.460052,0.820306
5,0.1749,0.874165,0.73,0.72449,0.729892,0.739583,0.71,0.75,0.460368,0.81845
6,0.1156,0.995682,0.73375,0.713324,0.732391,0.772595,0.6625,0.805,0.47232,0.812637


Model seed is: 200, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 DISRoBERTa-B 200 2e-05 8


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6884,0.6621,0.61375,0.701449,0.577273,0.571654,0.9075,0.32,0.281133,0.737863
2,0.5929,0.565066,0.6925,0.687817,0.692431,0.698454,0.6775,0.7075,0.385173,0.776256
3,0.3776,0.651432,0.7025,0.734375,0.698153,0.663306,0.8225,0.5825,0.417193,0.789781
4,0.289,0.717769,0.70625,0.741474,0.700694,0.662083,0.8425,0.57,0.428725,0.791
5,0.1611,0.889174,0.7175,0.746637,0.713714,0.676829,0.8325,0.6025,0.446983,0.793969
6,0.1133,0.921625,0.72,0.735849,0.718988,0.696429,0.78,0.66,0.443203,0.79235


Model seed is: 999, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 DISRoBERTa-B 999 2e-05 8


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6858,0.668823,0.61625,0.71065,0.570539,0.570348,0.9425,0.29,0.306814,0.7548
2,0.6385,0.60698,0.6975,0.64095,0.689805,0.788321,0.54,0.855,0.416187,0.792713
3,0.4022,0.559927,0.74875,0.767092,0.747182,0.714903,0.8275,0.67,0.503788,0.829412
4,0.3046,0.668967,0.7425,0.760465,0.741043,0.71087,0.8175,0.6675,0.49055,0.827362
5,0.1229,0.756555,0.76,0.768675,0.759662,0.74186,0.7975,0.7225,0.521469,0.827412
6,0.1705,0.833901,0.7575,0.767386,0.757061,0.737327,0.8,0.715,0.516871,0.825975




Model seed is: 94, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 XLNet-B 94 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6569,0.598788,0.675,0.697674,0.673162,0.652174,0.75,0.6,0.354005,0.734037
2,0.4961,0.669552,0.68625,0.733262,0.676191,0.637708,0.8625,0.51,0.39805,0.767169
3,0.3535,0.790671,0.695,0.736501,0.687242,0.648289,0.8525,0.5375,0.410919,0.781119
4,0.1942,1.027877,0.70125,0.685112,0.700463,0.724234,0.65,0.7525,0.404631,0.774913
5,0.1745,1.238733,0.71,0.736364,0.707071,0.675,0.81,0.61,0.428661,0.774713
6,0.0983,1.311127,0.71,0.719807,0.709644,0.696262,0.745,0.675,0.421033,0.773687


Model seed is: 791, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 XLNet-B 791 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6905,0.681241,0.5925,0.694757,0.540987,0.555389,0.9275,0.2575,0.249205,0.666606
2,0.5585,0.66746,0.6575,0.716942,0.641699,0.610915,0.8675,0.4475,0.347098,0.753656
3,0.4094,0.679648,0.69875,0.679095,0.697616,0.726496,0.6375,0.76,0.400516,0.764
4,0.3563,0.761828,0.69125,0.717714,0.688512,0.661053,0.785,0.5975,0.389406,0.771056
5,0.312,0.988342,0.6875,0.733475,0.677916,0.639405,0.86,0.515,0.39953,0.7739
6,0.1105,0.950831,0.69375,0.717416,0.691587,0.665953,0.7775,0.61,0.393053,0.775762


Model seed is: 5, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 XLNet-B 5 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6927,0.648446,0.625,0.694501,0.604532,0.585911,0.8525,0.3975,0.280744,0.696125
2,0.5637,0.670941,0.65,0.692982,0.643003,0.617188,0.79,0.51,0.3125,0.719669
3,0.4705,0.86494,0.6625,0.720497,0.647315,0.614841,0.87,0.455,0.357213,0.749912
4,0.3238,0.837602,0.69375,0.714785,0.692075,0.668845,0.7675,0.62,0.391785,0.747569
5,0.1726,1.037528,0.69625,0.712426,0.695286,0.676404,0.7525,0.64,0.395008,0.752506
6,0.1772,1.150635,0.695,0.706731,0.694511,0.680556,0.735,0.655,0.391254,0.749869


Model seed is: 6932, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 XLNet-B 6932 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6876,0.65694,0.64625,0.696026,0.636503,0.610169,0.81,0.4825,0.309573,0.713806
2,0.5569,0.635882,0.66375,0.693273,0.660606,0.637317,0.76,0.5675,0.333742,0.74095
3,0.5344,0.610924,0.6975,0.695214,0.697483,0.700508,0.69,0.705,0.395044,0.76205
4,0.3763,0.667253,0.695,0.677249,0.694075,0.719101,0.64,0.75,0.392381,0.771556
5,0.2636,0.8292,0.70375,0.686922,0.702892,0.728291,0.65,0.7575,0.409875,0.767125
6,0.1676,0.929147,0.70375,0.681879,0.702343,0.736232,0.635,0.7725,0.411408,0.766994


Model seed is: 1759, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 XLNet-B 1759 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7096,0.668215,0.5675,0.324219,0.503102,0.741071,0.2075,0.9275,0.194532,0.701862
2,0.6514,0.643863,0.63625,0.710448,0.610686,0.590083,0.8925,0.38,0.317345,0.766806
3,0.5864,0.587975,0.695,0.648415,0.68955,0.765306,0.5625,0.8275,0.40446,0.789837
4,0.5287,0.662144,0.70875,0.729384,0.707047,0.681128,0.785,0.6325,0.422441,0.802706
5,0.3111,0.682149,0.73,0.72796,0.729985,0.733503,0.7225,0.7375,0.460052,0.805662
6,0.2874,0.719817,0.735,0.735,0.735,0.735,0.735,0.735,0.47,0.807562


Model seed is: 323, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 XLNet-B 323 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.697,0.6898,0.57125,0.528198,0.56765,0.587156,0.48,0.6625,0.144934,0.609019
2,0.6907,0.74495,0.53,0.669596,0.427871,0.51626,0.9525,0.1075,0.112199,0.71705
3,0.4968,0.777188,0.62,0.711575,0.577399,0.573394,0.9375,0.3025,0.310675,0.767244
4,0.4237,0.618978,0.71375,0.711223,0.713728,0.717557,0.705,0.7225,0.427565,0.77925
5,0.2791,0.878165,0.6775,0.731809,0.66371,0.626335,0.88,0.475,0.388268,0.777663
6,0.1714,0.803585,0.7125,0.728132,0.711546,0.690583,0.77,0.655,0.427838,0.781331


Model seed is: 1694, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 XLNet-B 1694 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.715,0.659503,0.6,0.532164,0.591409,0.640845,0.455,0.745,0.208981,0.630756
2,0.6144,0.711174,0.61625,0.686415,0.596025,0.580311,0.84,0.3925,0.259985,0.684931
3,0.552,0.642106,0.6775,0.691388,0.676846,0.662844,0.7225,0.6325,0.356447,0.727856
4,0.4267,0.761418,0.655,0.709474,0.642429,0.612727,0.8425,0.4675,0.334403,0.740519
5,0.3399,0.777309,0.675,0.703196,0.67204,0.647059,0.77,0.58,0.356494,0.74565
6,0.3043,0.817915,0.68375,0.716685,0.679418,0.649087,0.8,0.5675,0.377855,0.749437


Model seed is: 9741, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 XLNet-B 9741 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7176,0.656388,0.60875,0.598203,0.60848,0.614776,0.5825,0.635,0.2178,0.645556
2,0.6397,0.646117,0.6375,0.690171,0.626712,0.602612,0.8075,0.4675,0.292421,0.693488
3,0.5175,0.63116,0.67875,0.714127,0.673754,0.643287,0.8025,0.555,0.36898,0.745319
4,0.3889,0.821089,0.6625,0.726166,0.643213,0.610922,0.895,0.43,0.367103,0.760613
5,0.3471,0.731846,0.69875,0.7252,0.695933,0.666667,0.795,0.6025,0.405076,0.773281
6,0.3021,0.783625,0.7075,0.732877,0.704836,0.67437,0.8025,0.6125,0.4227,0.776381


Model seed is: 200, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 XLNet-B 200 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6972,0.638758,0.63,0.676149,0.622331,0.601167,0.7725,0.4875,0.271249,0.695119
2,0.6241,0.593597,0.675,0.701835,0.672346,0.648305,0.765,0.585,0.355812,0.757275
3,0.426,0.715864,0.65375,0.70247,0.64421,0.615819,0.8175,0.49,0.325448,0.7603
4,0.3526,0.740571,0.67375,0.712871,0.667579,0.636542,0.81,0.5375,0.361168,0.769262
5,0.2819,0.883933,0.69,0.730435,0.682864,0.646154,0.84,0.54,0.398348,0.775231
6,0.16,0.914198,0.7025,0.723898,0.700702,0.675325,0.78,0.625,0.409955,0.775137


Model seed is: 999, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 XLNet-B 999 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6781,0.628233,0.655,0.718367,0.636603,0.606897,0.88,0.43,0.347133,0.736431
2,0.5976,0.705432,0.66375,0.573693,0.648043,0.78355,0.4525,0.875,0.361334,0.765781
3,0.4308,0.677726,0.71625,0.718711,0.716228,0.712531,0.725,0.7075,0.432566,0.779444
4,0.3121,0.738466,0.725,0.722222,0.724972,0.729592,0.715,0.735,0.45009,0.787669
5,0.1578,0.820273,0.73375,0.743682,0.73335,0.716937,0.7725,0.695,0.46891,0.792844
6,0.1341,0.992208,0.715,0.690217,0.713164,0.755952,0.635,0.795,0.435612,0.790956




Model seed is: 94, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 XLNet-L 94 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7312,0.702227,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.573731
2,0.7034,0.704024,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.653603
3,0.6852,0.701278,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.653544
4,0.7351,0.709201,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.650069
5,0.6962,0.696441,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.634394
6,0.706,0.694256,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.646944


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


Model seed is: 791, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 XLNet-L 791 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7139,0.770466,0.48875,0.651321,0.346738,0.494179,0.955,0.0225,-0.062298,0.589081
2,0.6601,0.777534,0.55875,0.687887,0.467611,0.532148,0.9725,0.145,0.209274,0.648487
3,0.5826,0.72579,0.63,0.687764,0.616888,0.594891,0.815,0.445,0.279861,0.703819
4,0.547,0.683071,0.67125,0.708749,0.665709,0.636183,0.8,0.5425,0.354453,0.728562
5,0.5466,0.946607,0.61,0.705104,0.564729,0.566869,0.9325,0.2875,0.287889,0.726875
6,0.3544,0.766991,0.65375,0.70247,0.64421,0.615819,0.8175,0.49,0.325448,0.73755


Model seed is: 5, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 XLNet-L 5 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7211,0.667721,0.6,0.594937,0.599937,0.602564,0.5875,0.6125,0.200063,0.628406
2,0.6756,0.650984,0.63375,0.670416,0.62916,0.609407,0.745,0.5225,0.274378,0.659319
3,0.5993,0.811166,0.59,0.688213,0.544836,0.555215,0.905,0.275,0.231781,0.715387
4,0.5004,0.701441,0.6475,0.694805,0.638823,0.612595,0.8025,0.4925,0.310286,0.727313
5,0.3953,0.728419,0.68375,0.721058,0.67799,0.64497,0.8175,0.55,0.381399,0.761763
6,0.2953,0.722369,0.69875,0.712753,0.698032,0.681093,0.7475,0.65,0.399403,0.764906


Model seed is: 6932, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 XLNet-L 6932 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7227,0.691323,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.672694
2,0.7227,0.652492,0.61375,0.579592,0.611183,0.635821,0.5325,0.695,0.230565,0.680119
3,0.6458,0.64156,0.63625,0.671928,0.631897,0.61191,0.745,0.5275,0.279184,0.685994
4,0.6795,0.63918,0.635,0.696466,0.619393,0.596085,0.8375,0.4325,0.295302,0.679231
5,0.5446,0.614509,0.68625,0.716384,0.682668,0.653608,0.7925,0.58,0.381206,0.7515
6,0.457,0.642402,0.69375,0.704463,0.693347,0.680653,0.73,0.6575,0.388522,0.752713


Model seed is: 1759, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 XLNet-L 1759 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7421,0.685692,0.55,0.659091,0.498663,0.530488,0.87,0.23,0.130145,0.600894
2,0.723,0.682589,0.5175,0.670085,0.386206,0.509091,0.98,0.055,0.092113,0.664331
3,0.7172,0.643284,0.64875,0.684624,0.644146,0.621181,0.7625,0.535,0.305511,0.701206
4,0.7327,0.722377,0.51125,0.671704,0.357857,0.505689,1.0,0.0225,0.106668,0.682869
5,0.6349,0.666641,0.6475,0.622995,0.646004,0.66954,0.5825,0.7125,0.297525,0.689338
6,0.6221,0.675297,0.64875,0.698821,0.638766,0.611632,0.815,0.4825,0.315448,0.728006


Model seed is: 323, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 XLNet-L 323 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7036,0.704548,0.5425,0.540201,0.542489,0.542929,0.5375,0.5475,0.085004,0.562375
2,0.7023,0.689562,0.55875,0.504909,0.553469,0.57508,0.45,0.6675,0.120382,0.528625
3,0.6998,0.712753,0.50125,0.661578,0.356917,0.500642,0.975,0.0275,0.007818,0.581788
4,0.7258,0.696198,0.5025,0.00995,0.338865,1.0,0.005,1.0,0.050063,0.622262
5,0.7027,0.679221,0.54625,0.670898,0.470258,0.526316,0.925,0.1675,0.14169,0.658194
6,0.6639,0.662202,0.6125,0.565826,0.60797,0.643312,0.505,0.72,0.230388,0.660831


Model seed is: 1694, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 XLNet-L 1694 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7035,0.677087,0.57125,0.564168,0.571137,0.573643,0.555,0.5875,0.142575,0.605144
2,0.6875,0.712533,0.5425,0.652751,0.491211,0.525994,0.86,0.225,0.110031,0.618575
3,0.6953,0.66678,0.58,0.590244,0.579737,0.57619,0.605,0.555,0.1602,0.629956
4,0.6654,0.650638,0.6175,0.640845,0.615877,0.603982,0.6825,0.5525,0.237011,0.676806
5,0.6383,0.646596,0.6575,0.6575,0.6575,0.6575,0.6575,0.6575,0.315,0.709094
6,0.5702,0.638067,0.65625,0.672229,0.655431,0.642369,0.705,0.6075,0.313996,0.729262


Model seed is: 9741, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 XLNet-L 9741 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7013,0.685447,0.5475,0.666052,0.482251,0.527778,0.9025,0.1925,0.134905,0.604837
2,0.6962,0.718419,0.46625,0.56999,0.433265,0.477234,0.7075,0.225,-0.077064,0.476106
3,0.6827,0.6939,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.555362
4,0.7199,0.695227,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.551828
5,0.7105,0.698084,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.543653
6,0.7256,0.696424,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.5531


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Model seed is: 200, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 XLNet-L 200 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7088,0.689095,0.5475,0.423567,0.525569,0.583333,0.3325,0.7625,0.105225,0.548906
2,0.715,0.681297,0.6025,0.614078,0.602142,0.596698,0.6325,0.5725,0.20537,0.623687
3,0.6772,0.652939,0.60875,0.680286,0.58813,0.57513,0.8325,0.385,0.243211,0.672613
4,0.5878,0.61421,0.6625,0.643799,0.661567,0.681564,0.61,0.715,0.326807,0.719156
5,0.4159,0.64859,0.6775,0.666667,0.677159,0.68984,0.645,0.71,0.355752,0.749638
6,0.3543,0.671474,0.68375,0.676884,0.683607,0.691906,0.6625,0.705,0.367832,0.751819


Model seed is: 999, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 XLNet-L 999 2e-05 8


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7353,0.703242,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.556481
2,0.7379,0.702501,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.499928
3,0.6947,0.712894,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.58235
4,0.7079,0.693556,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.480406
5,0.7205,0.726328,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.560037
6,0.7579,0.702606,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.566969


  _warn_prf(average, modifier, msg_start, len(result))




Model seed is: 94, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 DEBERT-B 94 2e-05 8


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.693,0.689686,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.757988
2,0.5627,0.594436,0.7125,0.72619,0.711779,0.693182,0.7625,0.6625,0.427141,0.787069
3,0.4399,0.976176,0.7,0.758551,0.681256,0.63468,0.9425,0.4575,0.457397,0.81385
4,0.2406,1.154199,0.7375,0.709945,0.735109,0.79321,0.6425,0.8325,0.483813,0.808056
5,0.1461,1.211524,0.75625,0.755332,0.756247,0.758186,0.7525,0.76,0.512514,0.8141
6,0.0609,1.199613,0.7525,0.771363,0.750804,0.716738,0.835,0.67,0.512018,0.816781


Model seed is: 791, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 DEBERT-B 791 2e-05 8


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6854,0.675661,0.6825,0.717778,0.67746,0.646,0.8075,0.5575,0.37697,0.748725
2,0.6917,0.625773,0.68625,0.730397,0.677605,0.640301,0.85,0.5225,0.394242,0.753425
3,0.4657,0.664333,0.6975,0.717949,0.695902,0.672489,0.77,0.625,0.399219,0.7742
4,0.4693,0.645788,0.7025,0.746269,0.693376,0.650558,0.875,0.53,0.431492,0.682687
5,0.3484,0.887037,0.695,0.746888,0.68162,0.638298,0.9,0.49,0.427591,0.778112
6,0.2303,0.82113,0.71875,0.736225,0.71751,0.693157,0.785,0.6525,0.441392,0.790094


Model seed is: 5, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 DEBERT-B 5 2e-05 8


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6849,0.680803,0.58375,0.70188,0.50622,0.546722,0.98,0.1875,0.274648,0.748962
2,0.5169,0.589987,0.695,0.716279,0.693275,0.669565,0.77,0.62,0.394463,0.779919
3,0.4851,1.087429,0.71375,0.750816,0.707273,0.66474,0.8625,0.565,0.447774,0.798837
4,0.2314,1.188554,0.71,0.692308,0.709038,0.737288,0.6525,0.7675,0.422805,0.786294
5,0.0304,1.403185,0.7225,0.720403,0.722484,0.725888,0.715,0.73,0.44505,0.790631
6,0.0514,1.477059,0.7225,0.738824,0.721412,0.697778,0.785,0.66,0.448518,0.796244


Model seed is: 6932, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 DEBERT-B 6932 2e-05 8


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.696,0.686087,0.54375,0.682884,0.434983,0.523302,0.9825,0.105,0.182453,0.752512
2,0.5698,0.715605,0.66125,0.72033,0.645427,0.613357,0.8725,0.45,0.355818,0.735762
3,0.4848,0.711433,0.6825,0.709382,0.67976,0.654008,0.775,0.59,0.371411,0.736437
4,0.3617,0.949098,0.6775,0.720779,0.669561,0.635496,0.8325,0.5225,0.373395,0.768038
5,0.3199,1.056395,0.695,0.698765,0.694952,0.690244,0.7075,0.6825,0.390122,0.764181
6,0.1424,1.136321,0.69,0.72009,0.686376,0.656379,0.7975,0.5825,0.389099,0.76735


Model seed is: 1759, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 DEBERT-B 1759 2e-05 8


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6951,0.688589,0.5875,0.628378,0.582448,0.571721,0.6975,0.4775,0.179395,0.685891
2,0.6816,0.648645,0.66375,0.61406,0.658082,0.720539,0.535,0.7925,0.338929,0.732963
3,0.5305,0.573348,0.7275,0.704607,0.725853,0.769231,0.65,0.805,0.460566,0.796663
4,0.3773,0.623655,0.7125,0.709596,0.712471,0.716837,0.7025,0.7225,0.425085,0.795044
5,0.2571,0.946304,0.7275,0.730864,0.727457,0.721951,0.74,0.715,0.455142,0.806294
6,0.1918,1.098926,0.72125,0.729697,0.720978,0.708235,0.7525,0.69,0.443367,0.803494


Model seed is: 323, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 DEBERT-B 323 2e-05 8


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6784,0.639193,0.63125,0.693666,0.615279,0.59325,0.835,0.4275,0.287449,0.732825
2,0.5517,0.617079,0.64625,0.726041,0.613461,0.592417,0.9375,0.355,0.359854,0.793944
3,0.2948,0.888627,0.655,0.732558,0.623321,0.598101,0.945,0.365,0.380547,0.802375
4,0.1924,1.085431,0.71,0.66474,0.704617,0.787671,0.575,0.845,0.4362,0.792406
5,0.0784,1.146665,0.72625,0.749714,0.723823,0.690526,0.82,0.6325,0.46067,0.799987
6,0.0387,1.251253,0.72625,0.746234,0.724542,0.695464,0.805,0.6475,0.458219,0.798156


Model seed is: 1694, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 DEBERT-B 1694 2e-05 8


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6902,0.66739,0.63625,0.628352,0.636086,0.642298,0.615,0.6575,0.272746,0.684875
2,0.6595,0.651591,0.65,0.690265,0.643983,0.619048,0.78,0.52,0.310685,0.713419
3,0.5562,0.647373,0.6725,0.669192,0.672467,0.67602,0.6625,0.6825,0.345069,0.710444
4,0.4893,0.820653,0.66875,0.710383,0.661761,0.631068,0.8125,0.525,0.352377,0.713537
5,0.3127,1.115035,0.6625,0.639037,0.661068,0.686782,0.5975,0.7275,0.327782,0.727481
6,0.1745,1.139937,0.67125,0.687277,0.670384,0.655329,0.7225,0.62,0.344313,0.725781


Model seed is: 9741, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 DEBERT-B 9741 2e-05 8


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6925,0.690038,0.525,0.124424,0.399262,0.794118,0.0675,0.9825,0.12393,0.721037
2,0.6201,0.695748,0.68875,0.726073,0.682863,0.64833,0.825,0.5525,0.392348,0.777981
3,0.5093,0.690407,0.7025,0.720657,0.701238,0.679204,0.7675,0.6375,0.408466,0.7838
4,0.4347,0.734735,0.7075,0.733485,0.704693,0.67364,0.805,0.61,0.423123,0.755719
5,0.2464,0.959836,0.72,0.723457,0.719956,0.714634,0.7325,0.7075,0.440138,0.792531
6,0.183,1.100042,0.7125,0.705128,0.71232,0.723684,0.6875,0.7375,0.425532,0.79005


Model seed is: 200, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 DEBERT-B 200 2e-05 8


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6745,0.632227,0.6525,0.64899,0.652465,0.655612,0.6425,0.6625,0.305061,0.726456
2,0.5608,0.568492,0.69625,0.703297,0.696079,0.687351,0.72,0.6725,0.392944,0.773813
3,0.3848,0.734149,0.69125,0.645624,0.686046,0.757576,0.5625,0.82,0.395849,0.786706
4,0.1781,0.954448,0.70375,0.731597,0.700526,0.668737,0.8075,0.6,0.416567,0.787494
5,0.0757,1.115787,0.71625,0.726835,0.715823,0.700696,0.755,0.6775,0.433805,0.794231
6,0.0235,1.175251,0.70625,0.726426,0.704644,0.679739,0.78,0.6325,0.417062,0.793744


Model seed is: 999, total number of training (for each informative and uninform class) and test samples: 400, 6800 0.05 DEBERT-B 999 2e-05 8


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6806,0.596942,0.68,0.690073,0.679662,0.669014,0.7125,0.6475,0.360763,0.746463
2,0.6178,0.667248,0.67125,0.612666,0.663553,0.74552,0.52,0.8225,0.359335,0.759556
3,0.3971,0.720964,0.72,0.726161,0.719858,0.710526,0.7425,0.6975,0.440446,0.791719
4,0.3321,0.796752,0.7225,0.735084,0.721872,0.703196,0.77,0.675,0.447022,0.790162
5,0.0961,1.031741,0.72875,0.746199,0.727462,0.701099,0.7975,0.66,0.461887,0.796713
6,0.1802,1.132832,0.72125,0.721598,0.72125,0.720698,0.7225,0.72,0.442501,0.786775




Model seed is: 94, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 ALBERT-L 94 2e-05 4


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7314,0.71365,0.49875,0.652212,0.37756,0.499336,0.94,0.0575,-0.005316,0.540812
2,0.7176,0.706938,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.597788
3,0.7153,0.724623,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.637044
4,0.7585,0.697416,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.655844
5,0.6811,0.69228,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.659731
6,0.6821,0.688177,0.5325,0.200855,0.435233,0.691176,0.1175,0.9475,0.116537,0.661162


  _warn_prf(average, modifier, msg_start, len(result))


Model seed is: 791, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 ALBERT-L 791 2e-05 4


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7104,0.719994,0.495,0.657627,0.347861,0.497436,0.97,0.02,-0.032026,0.508663
2,0.7049,0.707437,0.4975,0.651042,0.37686,0.49867,0.9375,0.0575,-0.010527,0.445319
3,0.7692,0.695961,0.5025,0.038647,0.351533,0.571429,0.02,0.985,0.019066,0.551619
4,0.7149,0.693647,0.49625,0.411679,0.485621,0.494737,0.3525,0.64,-0.007831,0.502337
5,0.7265,0.702761,0.50625,0.648264,0.410086,0.503458,0.91,0.1025,0.021191,0.550506
6,0.6944,0.691485,0.5375,0.588889,0.530159,0.53,0.6625,0.4125,0.07746,0.537606


Model seed is: 5, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 ALBERT-L 5 2e-05 4


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7032,0.70769,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.519756
2,0.6924,0.696123,0.5125,0.470109,0.50936,0.514881,0.4325,0.5925,0.025326,0.53865
3,0.7283,0.673318,0.6,0.532164,0.591409,0.640845,0.455,0.745,0.208981,0.623706
4,0.7565,0.718016,0.505,0.665541,0.356809,0.502551,0.985,0.025,0.035714,0.537438
5,0.7351,0.711493,0.49375,0.661088,0.330544,0.496855,0.9875,0.0,-0.079305,0.532738
6,0.681,0.686952,0.5425,0.319703,0.487535,0.623188,0.215,0.87,0.112489,0.568331


Model seed is: 6932, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 ALBERT-L 6932 2e-05 4


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6669,0.677075,0.57625,0.61521,0.571861,0.56341,0.6775,0.475,0.155726,0.605281
2,0.587,0.66367,0.59875,0.655949,0.587345,0.574109,0.765,0.4325,0.209415,0.638381
3,0.3632,0.802188,0.62375,0.686785,0.607868,0.588235,0.825,0.4225,0.270368,0.6679
4,0.5587,1.139201,0.615,0.567416,0.610284,0.647436,0.505,0.725,0.235777,0.656225
5,0.1615,1.39775,0.63,0.645084,0.62933,0.619816,0.6725,0.5875,0.260944,0.659281
6,0.5659,1.560517,0.63625,0.658851,0.634646,0.620309,0.7025,0.57,0.274924,0.654494


Model seed is: 1759, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 ALBERT-L 1759 2e-05 4


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6868,0.776319,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.62735
2,0.6421,0.60503,0.695,0.736501,0.687242,0.648289,0.8525,0.5375,0.410919,0.773119
3,0.4622,1.154194,0.68625,0.61912,0.676191,0.787645,0.51,0.8625,0.39805,0.730144
4,0.3261,1.432198,0.72625,0.714472,0.725783,0.746594,0.685,0.7675,0.454048,0.765563
5,0.0009,1.768639,0.71625,0.736964,0.714479,0.686825,0.795,0.6375,0.437966,0.769537
6,0.0009,1.88568,0.71125,0.735395,0.708826,0.678647,0.8025,0.62,0.429717,0.765931


Model seed is: 323, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 ALBERT-L 323 2e-05 4


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7192,0.722813,0.49625,0.004938,0.33385,0.2,0.0025,0.99,-0.047583,0.427575
2,0.6926,0.730806,0.45375,0.548087,0.428862,0.467372,0.6625,0.245,-0.101796,0.452062
3,0.7519,0.693194,0.50875,0.630292,0.449224,0.505279,0.8375,0.18,0.023226,0.4934
4,0.6585,0.696322,0.4975,0.664441,0.33222,0.498747,0.995,0.0,-0.050063,0.590206
5,0.7078,0.698322,0.50625,0.052758,0.35943,0.647059,0.0275,0.985,0.043338,0.520669
6,0.7439,0.700858,0.48625,0.481715,0.486211,0.486005,0.4775,0.495,-0.027504,0.506956


Model seed is: 1694, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 ALBERT-L 1694 2e-05 4


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7489,0.694477,0.5275,0.42378,0.511678,0.542969,0.3475,0.7075,0.058953,0.472056
2,0.6926,0.699362,0.5025,0.014851,0.341038,0.75,0.0075,0.9975,0.035444,0.429369
3,0.6926,0.727794,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.501269
4,0.7152,0.693386,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.515272
5,0.6772,0.688894,0.55875,0.327619,0.499623,0.688,0.215,0.9025,0.161805,0.645425
6,0.7022,0.675389,0.63625,0.628352,0.636086,0.642298,0.615,0.6575,0.272746,0.6779


  _warn_prf(average, modifier, msg_start, len(result))


Model seed is: 9741, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 ALBERT-L 9741 2e-05 4


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6899,0.735189,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.509875
2,0.6782,0.661062,0.6025,0.693642,0.563903,0.564263,0.9,0.305,0.255062,0.701406
3,0.6677,0.687556,0.545,0.395349,0.51531,0.589109,0.2975,0.7925,0.10358,0.558262
4,0.6389,0.660291,0.62875,0.623574,0.62868,0.632391,0.615,0.6425,0.257597,0.6534
5,0.634,0.661278,0.61125,0.664509,0.6012,0.58444,0.77,0.4525,0.234641,0.65155
6,0.5475,0.639016,0.63375,0.698249,0.616215,0.593695,0.8475,0.42,0.295902,0.69695


Model seed is: 200, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 ALBERT-L 200 2e-05 4


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6967,0.694764,0.50375,0.546286,0.49935,0.503158,0.5975,0.41,0.007635,0.562813
2,0.74,0.683655,0.5775,0.545699,0.57542,0.590116,0.5075,0.6475,0.156542,0.618919
3,0.6835,0.695356,0.5025,0.61657,0.454193,0.501567,0.8,0.205,0.006221,0.475875
4,0.7145,0.698735,0.51,0.62089,0.464155,0.506309,0.8025,0.2175,0.02466,0.494188
5,0.7329,0.706869,0.4925,0.650602,0.361831,0.496063,0.945,0.04,-0.03526,0.474231
6,0.7167,0.70328,0.5,0.66443,0.342019,0.5,0.99,0.01,0.0,0.560944


Model seed is: 999, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 ALBERT-L 999 2e-05 4


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7312,0.704052,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.515031
2,0.6904,0.699344,0.49625,0.65585,0.358228,0.498054,0.96,0.0325,-0.020063,0.471763
3,0.8065,0.719153,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.609544
4,0.7634,0.698878,0.53,0.534653,0.529953,0.529412,0.54,0.52,0.060012,0.523919
5,0.6598,0.70849,0.4875,0.159836,0.395566,0.443182,0.0975,0.8775,-0.03995,0.45825
6,0.7463,0.701687,0.51875,0.202899,0.429113,0.590361,0.1225,0.915,0.061488,0.525925


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




Model seed is: 94, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 DISRoBERTa-B 94 2e-05 4


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6696,0.67899,0.615,0.69685,0.584727,0.574675,0.885,0.345,0.273268,0.725656
2,0.5959,0.582707,0.7,0.710145,0.699632,0.686916,0.735,0.665,0.400984,0.769344
3,0.5199,0.669769,0.69625,0.734426,0.689841,0.652427,0.84,0.5525,0.409802,0.778087
4,0.0595,0.864332,0.69875,0.709988,0.698297,0.684455,0.7375,0.66,0.398699,0.784119
5,0.0154,1.2362,0.69,0.729847,0.683106,0.646718,0.8375,0.5425,0.397699,0.778738
6,0.104,1.276052,0.695,0.727679,0.690544,0.657258,0.815,0.575,0.401742,0.777319


Model seed is: 791, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 DISRoBERTa-B 791 2e-05 4


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6928,0.687124,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.711619
2,0.5887,0.628155,0.65375,0.616874,0.650512,0.690402,0.5575,0.75,0.313361,0.741125
3,0.4681,0.848225,0.685,0.683417,0.684992,0.686869,0.68,0.69,0.370019,0.756794
4,0.1153,1.352373,0.68375,0.718576,0.678832,0.647295,0.8075,0.56,0.379301,0.762044
5,0.1327,1.539172,0.67375,0.660598,0.673259,0.688347,0.635,0.7125,0.348548,0.752375
6,0.0091,1.605907,0.68125,0.695341,0.680567,0.665904,0.7275,0.635,0.364061,0.758838


Model seed is: 5, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 DISRoBERTa-B 5 2e-05 4


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6941,0.683812,0.51,0.668919,0.363306,0.505102,0.99,0.03,0.071429,0.723994
2,0.5802,0.626611,0.69,0.706161,0.689059,0.671171,0.745,0.635,0.38232,0.7544
3,0.7118,1.300208,0.6475,0.705637,0.633192,0.605735,0.845,0.45,0.321113,0.750337
4,0.076,1.475476,0.67875,0.698002,0.677439,0.658537,0.7425,0.615,0.360442,0.751287
5,0.295,1.707565,0.66625,0.702341,0.66127,0.633803,0.7875,0.545,0.34273,0.744188
6,0.1519,1.719343,0.67375,0.696158,0.671966,0.651416,0.7475,0.6,0.351343,0.746969


Model seed is: 6932, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 DISRoBERTa-B 6932 2e-05 4


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6885,0.676152,0.64375,0.609053,0.640922,0.674772,0.555,0.7325,0.292139,0.705438
2,0.4459,0.819703,0.595,0.677291,0.566833,0.562914,0.85,0.34,0.220885,0.706944
3,0.4038,0.939581,0.63625,0.675585,0.630823,0.609658,0.7575,0.515,0.280884,0.728325
4,0.3958,1.41273,0.66,0.59403,0.650778,0.737037,0.4975,0.8225,0.338369,0.735313
5,0.185,1.570221,0.67,0.657143,0.669535,0.683784,0.6325,0.7075,0.34096,0.730613
6,0.0044,1.631371,0.67,0.653543,0.669254,0.687845,0.6225,0.7175,0.341545,0.729006


Model seed is: 1759, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 DISRoBERTa-B 1759 2e-05 4


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6771,0.667708,0.635,0.7154,0.603344,0.586262,0.9175,0.3525,0.327237,0.723475
2,0.5794,0.573814,0.71125,0.720677,0.710921,0.697892,0.745,0.6775,0.423466,0.784025
3,0.4467,0.696012,0.71875,0.689655,0.716256,0.769231,0.625,0.8125,0.445399,0.789631
4,0.5332,1.370027,0.71625,0.712294,0.716196,0.722365,0.7025,0.73,0.432664,0.783963
5,0.1183,1.54809,0.72375,0.739079,0.722793,0.700224,0.7825,0.665,0.450622,0.792994
6,0.0496,1.632788,0.72375,0.750846,0.720444,0.683778,0.8325,0.615,0.458476,0.795259


Model seed is: 323, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 DISRoBERTa-B 323 2e-05 4


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6875,0.690178,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.746544
2,0.6997,0.642561,0.705,0.693506,0.704585,0.721622,0.6675,0.7425,0.411158,0.789869
3,0.5157,0.883197,0.6325,0.722117,0.589841,0.580547,0.955,0.31,0.346776,0.783669
4,0.2585,0.806582,0.705,0.737194,0.700506,0.664659,0.8275,0.5825,0.422888,0.782175
5,0.2282,1.053887,0.69875,0.732519,0.693871,0.658683,0.825,0.5725,0.410812,0.781444
6,0.1984,1.149198,0.7025,0.738462,0.696767,0.658824,0.84,0.565,0.421241,0.782863


  _warn_prf(average, modifier, msg_start, len(result))


Model seed is: 1694, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 DISRoBERTa-B 1694 2e-05 4


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7049,0.677093,0.50625,0.669456,0.347074,0.503145,1.0,0.0125,0.079305,0.725394
2,0.4745,0.736566,0.6525,0.715164,0.634826,0.605903,0.8725,0.4325,0.339644,0.739969
3,0.1658,0.90066,0.68125,0.696067,0.680491,0.665148,0.73,0.6325,0.364235,0.748562
4,0.1876,1.326179,0.68625,0.700119,0.685577,0.670481,0.7325,0.64,0.374104,0.746069
5,0.0563,1.595387,0.67625,0.654206,0.674929,0.702006,0.6125,0.74,0.355401,0.733806
6,0.043,1.629517,0.6925,0.691729,0.692498,0.693467,0.69,0.695,0.385005,0.737069


Model seed is: 9741, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 DISRoBERTa-B 9741 2e-05 4


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6849,0.680479,0.67,0.697248,0.667305,0.644068,0.76,0.58,0.345646,0.708106
2,0.618,0.820782,0.5775,0.698752,0.49582,0.542936,0.98,0.175,0.261262,0.76085
3,0.3131,0.791197,0.69,0.718821,0.686708,0.657676,0.7925,0.5875,0.388246,0.769575
4,0.0284,1.164798,0.69,0.698297,0.689765,0.680095,0.7175,0.6625,0.380576,0.772581
5,0.1594,1.385129,0.68625,0.700834,0.685503,0.669704,0.735,0.6375,0.374283,0.773913
6,0.0757,1.444523,0.69,0.706161,0.689059,0.671171,0.745,0.635,0.38232,0.773919


Model seed is: 200, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 DISRoBERTa-B 200 2e-05 4


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6922,0.681701,0.64,0.702479,0.623392,0.598592,0.85,0.43,0.308532,0.73875
2,0.5635,0.594838,0.67625,0.700578,0.674099,0.651613,0.7575,0.595,0.357248,0.755456
3,0.394,0.711788,0.695,0.680628,0.694381,0.714286,0.65,0.74,0.391589,0.763213
4,0.6276,1.089945,0.68375,0.699168,0.682917,0.666667,0.735,0.6325,0.369446,0.772294
5,0.3312,1.255428,0.69,0.705463,0.689143,0.671946,0.7425,0.6375,0.382112,0.774944
6,0.0904,1.318912,0.68625,0.700834,0.685503,0.669704,0.735,0.6375,0.374283,0.774263


Model seed is: 999, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 DISRoBERTa-B 999 2e-05 4


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7175,0.687758,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.714769
2,0.6325,0.727266,0.55625,0.276986,0.478439,0.747253,0.17,0.9425,0.177161,0.726588
3,0.6198,0.756593,0.685,0.688889,0.684951,0.680488,0.6975,0.6725,0.370116,0.750812
4,0.3634,1.093897,0.69,0.71754,0.687025,0.658996,0.7875,0.5925,0.387438,0.754975
5,0.0083,1.432201,0.66875,0.699887,0.665146,0.639752,0.7725,0.565,0.345009,0.74805
6,0.2876,1.490528,0.675,0.68059,0.6749,0.669082,0.6925,0.6575,0.350215,0.746906




Model seed is: 94, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 XLNet-B 94 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6605,0.641652,0.63,0.611549,0.629163,0.643646,0.5825,0.6775,0.261181,0.659738
2,0.694,0.669872,0.53125,0.677558,0.409717,0.516383,0.985,0.0775,0.148791,0.722406
3,0.622,0.737617,0.6375,0.719536,0.603584,0.586751,0.93,0.345,0.339074,0.730312
4,0.507,0.632157,0.7125,0.730679,0.711184,0.687225,0.78,0.645,0.428927,0.773863
5,0.3725,0.692023,0.69875,0.723307,0.696358,0.66879,0.7875,0.61,0.403914,0.779175
6,0.2586,0.764567,0.69125,0.730643,0.684502,0.647969,0.8375,0.545,0.399994,0.778706


Model seed is: 791, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 XLNet-B 791 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6766,0.680355,0.54375,0.215054,0.446734,0.769231,0.125,0.9625,0.160128,0.618287
2,0.6363,0.64647,0.61375,0.670224,0.60208,0.58473,0.785,0.4425,0.242145,0.692906
3,0.3877,0.864564,0.635,0.674833,0.629439,0.608434,0.7575,0.5125,0.278487,0.7159
4,0.2411,1.604349,0.64125,0.712713,0.617588,0.594324,0.89,0.3925,0.325662,0.731656
5,0.4152,1.71744,0.66125,0.648508,0.660804,0.673854,0.625,0.6975,0.323351,0.7302
6,0.0202,1.840635,0.66625,0.687719,0.664665,0.646154,0.735,0.5975,0.335688,0.73305


Model seed is: 5, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 XLNet-B 5 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6764,0.707047,0.535,0.670796,0.439654,0.519178,0.9475,0.1225,0.123865,0.635712
2,0.64,0.610454,0.68,0.673469,0.679872,0.6875,0.66,0.7,0.360288,0.744244
3,0.4847,0.860454,0.67125,0.709392,0.665487,0.635644,0.8025,0.54,0.354947,0.7487
4,0.3778,1.166632,0.655,0.680556,0.652778,0.633621,0.735,0.575,0.314046,0.738538
5,0.3684,1.760553,0.64875,0.712385,0.630671,0.60312,0.87,0.4275,0.331747,0.740525
6,0.0383,1.801491,0.65125,0.706006,0.638717,0.6102,0.8375,0.465,0.325959,0.742169


Model seed is: 6932, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 XLNet-B 6932 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6996,0.680119,0.5925,0.605327,0.592069,0.586854,0.625,0.56,0.185392,0.637194
2,0.6741,0.715014,0.57125,0.674264,0.523604,0.543645,0.8875,0.255,0.183975,0.633625
3,0.5363,0.746543,0.615,0.608142,0.614882,0.619171,0.5975,0.6325,0.230141,0.672219
4,0.6829,0.787908,0.61375,0.544919,0.604707,0.663082,0.4625,0.765,0.238682,0.679206
5,0.3718,0.888247,0.6525,0.607345,0.647843,0.698052,0.5375,0.7675,0.313402,0.693975
6,0.2242,0.997248,0.64125,0.613728,0.639419,0.664723,0.57,0.7125,0.285413,0.692012


Model seed is: 1759, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 XLNet-B 1759 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7166,0.671915,0.61125,0.622114,0.610928,0.605201,0.64,0.5825,0.222869,0.590437
2,0.6395,0.656252,0.625,0.642005,0.624152,0.614155,0.6725,0.5775,0.251136,0.640594
3,0.563,0.812585,0.53375,0.142529,0.411179,0.885714,0.0775,0.99,0.165006,0.700075
4,0.6928,0.788956,0.6525,0.530405,0.627306,0.817708,0.3925,0.9125,0.357073,0.759331
5,0.2903,0.765372,0.70625,0.736842,0.702226,0.667343,0.8225,0.59,0.424122,0.778394
6,0.1133,0.863055,0.71125,0.739572,0.707794,0.673511,0.82,0.6025,0.432863,0.782181


Model seed is: 323, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 XLNet-B 323 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7213,0.684082,0.54625,0.582278,0.542849,0.539446,0.6325,0.46,0.093908,0.54205
2,0.7526,0.684677,0.6125,0.657837,0.605575,0.588933,0.745,0.48,0.233342,0.664106
3,0.7142,0.820714,0.5975,0.68616,0.562592,0.5623,0.88,0.315,0.236338,0.693544
4,0.558,0.928843,0.63,0.634568,0.629942,0.626829,0.6425,0.6175,0.260081,0.69345
5,0.517,1.205615,0.63875,0.648846,0.638451,0.631206,0.6675,0.61,0.27796,0.694106
6,0.346,1.378722,0.6475,0.642132,0.647421,0.652062,0.6325,0.6625,0.295133,0.700231


Model seed is: 1694, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 XLNet-B 1694 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7395,0.729957,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.595906
2,0.6253,0.652079,0.60375,0.675537,0.583355,0.571924,0.825,0.3825,0.231386,0.690638
3,0.3946,0.682043,0.6525,0.669048,0.651629,0.638636,0.7025,0.6025,0.306537,0.722769
4,0.5028,0.909933,0.67125,0.686532,0.670467,0.656036,0.72,0.6225,0.34414,0.734237
5,0.1271,1.059775,0.69,0.67624,0.689439,0.70765,0.6475,0.7325,0.38138,0.742144
6,0.1743,1.228134,0.67375,0.691124,0.672714,0.65618,0.73,0.6175,0.34972,0.738413


Model seed is: 9741, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 XLNet-B 9741 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7192,0.733857,0.58125,0.680648,0.536331,0.550077,0.8925,0.27,0.207636,0.656112
2,0.8072,0.812104,0.53625,0.679343,0.420936,0.519155,0.9825,0.09,0.160737,0.665687
3,0.5054,0.640481,0.6675,0.689252,0.665863,0.64693,0.7375,0.5975,0.338332,0.715531
4,0.3267,0.776319,0.66,0.699115,0.654155,0.626984,0.79,0.53,0.331397,0.728062
5,0.3728,0.98758,0.64,0.701863,0.623802,0.59894,0.8475,0.4325,0.307753,0.725381
6,0.1502,1.010283,0.65125,0.693069,0.644653,0.618861,0.7875,0.515,0.314398,0.731812


Model seed is: 200, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 XLNet-B 200 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.716,0.712975,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.498556
2,0.7378,0.677218,0.575,0.685185,0.51567,0.544118,0.925,0.225,0.210042,0.630175
3,0.6334,0.639955,0.63125,0.61738,0.630765,0.641509,0.595,0.6675,0.263193,0.684106
4,0.4698,0.773029,0.64125,0.713858,0.616561,0.593698,0.895,0.3875,0.327859,0.705375
5,0.3526,0.831604,0.675,0.691943,0.674014,0.657658,0.73,0.62,0.352137,0.720319
6,0.2653,0.948138,0.665,0.706783,0.658056,0.628405,0.8075,0.5225,0.344278,0.717394


Model seed is: 999, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 XLNet-B 999 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6569,0.700663,0.56375,0.393043,0.526278,0.645714,0.2825,0.845,0.15421,0.614219
2,0.6624,0.638211,0.645,0.654501,0.644731,0.637441,0.6725,0.6175,0.29044,0.692394
3,0.5213,0.731564,0.67125,0.652576,0.670297,0.691877,0.6175,0.725,0.344496,0.728987
4,0.297,0.812633,0.68625,0.698679,0.685715,0.672055,0.7275,0.645,0.373774,0.737194
5,0.0594,1.09488,0.67125,0.681212,0.670929,0.661176,0.7025,0.64,0.343171,0.735281
6,0.1089,1.347857,0.6725,0.70362,0.668849,0.642562,0.7775,0.5675,0.352868,0.736344




Model seed is: 94, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 XLNet-L 94 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7314,0.685234,0.5425,0.207792,0.443088,0.774194,0.12,0.965,0.158948,0.506444
2,0.7716,0.706669,0.5075,0.029557,0.349787,1.0,0.015,1.0,0.086929,0.482106
3,0.7264,0.694459,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.560609
4,0.8248,0.69429,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.610806
5,0.7093,0.692557,0.5075,0.029557,0.349787,1.0,0.015,1.0,0.086929,0.591969
6,0.7675,0.693804,0.51625,0.062954,0.368461,1.0,0.0325,1.0,0.128524,0.589419


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


Model seed is: 791, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 XLNet-L 791 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6336,0.677682,0.59,0.549451,0.586652,0.609756,0.5,0.68,0.182989,0.588975
2,0.7831,0.668716,0.58,0.498507,0.568609,0.618519,0.4175,0.7425,0.169184,0.625344
3,0.7966,0.715124,0.51875,0.67234,0.383229,0.509677,0.9875,0.05,0.107763,0.639787
4,0.6595,0.685672,0.60875,0.665955,0.596929,0.581006,0.78,0.4375,0.231502,0.671725
5,0.3898,1.008648,0.61625,0.583446,0.613855,0.637982,0.5375,0.695,0.235439,0.6627
6,0.3065,1.353065,0.60625,0.484452,0.582974,0.701422,0.37,0.8425,0.241113,0.661763


Model seed is: 5, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 XLNet-L 5 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7526,0.697899,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.495806
2,0.7511,0.735771,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.527
3,0.7959,0.693277,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.527913
4,0.6906,0.693071,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.6141
5,0.7399,0.700453,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.639294
6,0.7198,0.693536,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.630694


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


Model seed is: 6932, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 XLNet-L 6932 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6867,0.696651,0.525,0.233871,0.444834,0.604167,0.145,0.905,0.076932,0.5585
2,0.6654,0.691111,0.5425,0.580275,0.538764,0.536017,0.6325,0.4525,0.086411,0.570594
3,0.7839,0.721725,0.53625,0.672551,0.439059,0.519782,0.9525,0.12,0.130861,0.6149
4,0.6637,0.681644,0.56625,0.396522,0.528993,0.651429,0.285,0.8475,0.160257,0.619594
5,0.5496,0.656974,0.6175,0.62954,0.617096,0.610329,0.65,0.585,0.235498,0.662088
6,0.7133,0.654386,0.635,0.667426,0.631497,0.612971,0.7325,0.5375,0.275285,0.672063


Model seed is: 1759, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 XLNet-L 1759 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7559,0.699568,0.50125,0.004988,0.336105,1.0,0.0025,1.0,0.035377,0.574084
2,0.7561,0.695228,0.51,0.096774,0.380291,0.617647,0.0525,0.9675,0.049572,0.564722
3,0.7792,0.817408,0.51625,0.110345,0.389078,0.685714,0.06,0.9725,0.079447,0.506088
4,0.7251,0.656348,0.6425,0.601671,0.638704,0.679245,0.54,0.745,0.291184,0.674044
5,0.6735,0.646215,0.65625,0.686431,0.653036,0.631027,0.7525,0.56,0.318456,0.6757
6,0.5624,0.627541,0.665,0.678657,0.664394,0.652074,0.7075,0.6225,0.331199,0.703231


Model seed is: 323, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 XLNet-L 323 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7468,0.774791,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.508628
2,0.7209,0.693654,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.444044
3,0.7148,0.703479,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.443563
4,0.7352,0.694008,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.442459
5,0.7101,0.699188,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.456156
6,0.6898,0.709905,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.442738


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


Model seed is: 1694, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 XLNet-L 1694 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7505,0.668141,0.625,0.546828,0.613499,0.69084,0.4525,0.7975,0.266353,0.61245
2,0.7146,0.690925,0.5725,0.674905,0.525409,0.544479,0.8875,0.2575,0.186712,0.664931
3,0.6392,0.703007,0.535,0.671958,0.436837,0.519074,0.9525,0.1175,0.127215,0.659969
4,0.5436,0.660806,0.6175,0.658482,0.611912,0.594758,0.7375,0.4975,0.242075,0.632581
5,0.5721,0.760307,0.58125,0.681256,0.535528,0.549923,0.895,0.2675,0.208703,0.685075
6,0.4106,0.728171,0.6025,0.676171,0.580804,0.570447,0.83,0.375,0.23021,0.692381


Model seed is: 9741, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 XLNet-L 9741 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7747,0.703357,0.52125,0.139326,0.403862,0.688889,0.0775,0.965,0.092229,0.572275
2,0.7273,0.698195,0.51875,0.672897,0.381366,0.509653,0.99,0.0475,0.112206,0.569575
3,0.6924,0.803741,0.5075,0.670017,0.349787,0.503778,1.0,0.015,0.086929,0.592825
4,0.5392,0.644817,0.64,0.644444,0.639944,0.636585,0.6525,0.6275,0.280088,0.680131
5,0.5117,0.738429,0.64875,0.711202,0.631518,0.603839,0.865,0.4325,0.329957,0.727438
6,0.354,0.741718,0.635,0.697095,0.618988,0.595745,0.84,0.43,0.296025,0.7203


Model seed is: 200, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 XLNet-L 200 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6819,0.692328,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.585025
2,0.761,0.68271,0.57375,0.459588,0.553839,0.627706,0.3625,0.785,0.162738,0.61825
3,0.7091,0.66062,0.60625,0.563107,0.602373,0.632399,0.5075,0.705,0.21677,0.662744
4,0.7729,0.750913,0.615,0.65,0.611111,0.595833,0.715,0.515,0.234743,0.663037
5,0.3113,1.148225,0.61875,0.510433,0.599127,0.713004,0.3975,0.84,0.26484,0.707031
6,0.2957,1.565016,0.6375,0.620419,0.636764,0.651099,0.5925,0.6825,0.276121,0.68625


Model seed is: 999, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 XLNet-L 999 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7163,0.686767,0.55,0.55774,0.549862,0.548309,0.5675,0.5325,0.100061,0.550937
2,0.7352,0.696938,0.52125,0.467316,0.516291,0.526646,0.42,0.6225,0.043399,0.48745
3,0.6825,0.740691,0.50875,0.668354,0.360683,0.504459,0.99,0.0275,0.064509,0.63895
4,0.6003,0.675724,0.59,0.623853,0.586652,0.576271,0.68,0.5,0.182989,0.6425
5,0.4815,0.671289,0.62375,0.570613,0.617898,0.664452,0.5,0.7475,0.255448,0.676219
6,0.3713,0.766293,0.6175,0.658482,0.611912,0.594758,0.7375,0.4975,0.242075,0.670856




Model seed is: 94, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 DEBERT-B 94 2e-05 4


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6505,0.650411,0.66125,0.634278,0.659397,0.68915,0.5875,0.735,0.326066,0.70065
2,0.5373,0.623322,0.655,0.710692,0.641724,0.611913,0.8475,0.4625,0.335892,0.748469
3,0.2745,0.956109,0.6925,0.667568,0.690761,0.726471,0.6175,0.7675,0.389406,0.732662
4,0.0777,1.621733,0.675,0.701835,0.672346,0.648305,0.765,0.585,0.355812,0.724581
5,0.0029,1.819179,0.675,0.682927,0.674797,0.666667,0.7,0.65,0.350438,0.720919
6,0.0017,1.851291,0.66875,0.671623,0.668725,0.665848,0.6775,0.66,0.337552,0.722031


Model seed is: 791, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 DEBERT-B 791 2e-05 4


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6416,0.640272,0.62625,0.699497,0.602641,0.584874,0.87,0.3825,0.289192,0.711769
2,0.6112,0.716108,0.61875,0.698318,0.590246,0.577741,0.8825,0.355,0.279558,0.729169
3,0.1791,1.23052,0.63,0.580737,0.62482,0.669935,0.5125,0.7475,0.267491,0.695944
4,0.1453,1.855749,0.64625,0.66823,0.644691,0.629139,0.7125,0.58,0.295102,0.713356
5,0.0079,1.991041,0.63625,0.632111,0.636204,0.639386,0.625,0.6475,0.272569,0.703694
6,0.0022,2.008166,0.64,0.643564,0.639964,0.637255,0.65,0.63,0.280056,0.708281


Model seed is: 5, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 DEBERT-B 5 2e-05 4


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6931,0.693822,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.68145
2,0.7647,0.67535,0.64625,0.699894,0.634574,0.607735,0.825,0.4675,0.313198,0.717006
3,0.5685,0.969419,0.66,0.68,0.658667,0.642222,0.7225,0.5975,0.32253,0.715369
4,0.0238,1.612703,0.64375,0.698413,0.631649,0.605505,0.825,0.4625,0.308482,0.711606
5,0.2762,1.792676,0.64,0.704312,0.622124,0.597561,0.8575,0.4225,0.310962,0.712125
6,0.3376,1.782675,0.65375,0.705005,0.642972,0.6141,0.8275,0.48,0.327937,0.712006


Model seed is: 6932, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 DEBERT-B 6932 2e-05 4


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6891,0.655666,0.625,0.659864,0.621018,0.603734,0.7275,0.5225,0.255425,0.699063
2,0.4798,0.81434,0.60875,0.692232,0.577677,0.570502,0.88,0.3375,0.258911,0.718944
3,0.3809,1.483018,0.63625,0.68676,0.626539,0.603025,0.7975,0.475,0.287882,0.728481
4,0.1937,1.924725,0.6475,0.520408,0.620876,0.81383,0.3825,0.9125,0.347878,0.708019
5,0.2297,1.750561,0.67,0.631285,0.666321,0.71519,0.565,0.775,0.347754,0.727656
6,0.0027,1.808084,0.6625,0.667488,0.662424,0.657767,0.6775,0.6475,0.325146,0.725637


Model seed is: 1759, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 DEBERT-B 1759 2e-05 4


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6914,0.675258,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.707944
2,0.7119,0.644708,0.69,0.689223,0.689998,0.690955,0.6875,0.6925,0.380005,0.746269
3,0.5412,1.675772,0.5475,0.177273,0.432602,0.975,0.0975,0.9975,0.217945,0.698531
4,0.3071,1.137628,0.705,0.731207,0.702169,0.671548,0.8025,0.6075,0.418025,0.776425
5,0.1346,1.297882,0.69875,0.724571,0.696079,0.667368,0.7925,0.605,0.404677,0.784825
6,0.1249,1.358456,0.70125,0.731159,0.697506,0.664622,0.8125,0.59,0.412849,0.786006


Model seed is: 323, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 DEBERT-B 323 2e-05 4


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6942,0.692037,0.61875,0.559885,0.611806,0.662116,0.485,0.7525,0.246482,0.696247
2,0.6967,0.664424,0.63625,0.540284,0.619677,0.733906,0.4275,0.845,0.299887,0.636038
3,0.5388,0.705666,0.65375,0.691193,0.648584,0.623742,0.775,0.5325,0.316961,0.731056
4,0.3868,1.293218,0.6625,0.71519,0.65054,0.618613,0.8475,0.4775,0.349827,0.741237
5,0.2657,1.578512,0.675,0.68059,0.6749,0.669082,0.6925,0.6575,0.350215,0.735694
6,0.1349,1.690853,0.665,0.661616,0.664966,0.668367,0.655,0.675,0.330066,0.7289


Model seed is: 1694, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 DEBERT-B 1694 2e-05 4


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7003,0.694821,0.50125,0.667223,0.336105,0.500626,1.0,0.0025,0.035377,0.513994
2,0.6696,0.69314,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.630631
3,0.6976,0.692243,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.709975
4,0.7125,0.67008,0.66875,0.649934,0.66779,0.689076,0.615,0.7225,0.339467,0.698688
5,0.5507,0.709513,0.65,0.569231,0.637247,0.74,0.4625,0.8375,0.323616,0.729981
6,0.5047,0.69597,0.68,0.69378,0.679351,0.665138,0.725,0.635,0.361467,0.721069


Model seed is: 9741, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 DEBERT-B 9741 2e-05 4


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7005,0.693614,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.568409
2,0.6752,0.793106,0.50125,0.666667,0.338296,0.500627,0.9975,0.005,0.020451,0.606625
3,0.5784,0.619386,0.65875,0.643137,0.658096,0.673973,0.615,0.7025,0.318722,0.723369
4,0.2084,0.801985,0.70375,0.707046,0.703713,0.699267,0.715,0.6925,0.407603,0.7521
5,0.1067,1.301257,0.70125,0.700876,0.70125,0.701754,0.7,0.7025,0.402501,0.749725
6,0.0972,1.416816,0.69125,0.712456,0.689562,0.666667,0.765,0.6175,0.38673,0.752594


  _warn_prf(average, modifier, msg_start, len(result))


Model seed is: 200, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 DEBERT-B 200 2e-05 4


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7094,0.695419,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.606169
2,0.6509,0.725811,0.6275,0.697154,0.606694,0.587329,0.8575,0.3975,0.287188,0.706631
3,0.6192,0.886195,0.63625,0.648126,0.635835,0.627635,0.67,0.6025,0.273123,0.699588
4,0.552,1.095274,0.65125,0.68757,0.646472,0.622718,0.7675,0.535,0.311023,0.725544
5,0.2632,1.583933,0.66625,0.705623,0.660171,0.631164,0.8,0.5325,0.345075,0.723381
6,0.1122,1.669183,0.6625,0.707158,0.654464,0.624521,0.815,0.51,0.34126,0.727663


  _warn_prf(average, modifier, msg_start, len(result))


Model seed is: 999, total number of training (for each informative and uninform class) and test samples: 200, 7000 0.025 DEBERT-B 999 2e-05 4


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6652,0.830911,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.658319
2,0.6224,0.791423,0.60625,0.470588,0.578578,0.717949,0.35,0.8625,0.247471,0.7063
3,0.4389,1.285354,0.65125,0.620408,0.648932,0.680597,0.57,0.7325,0.306575,0.718294
4,0.1309,1.603811,0.68,0.695238,0.679198,0.663636,0.73,0.63,0.361814,0.740537
5,0.0018,1.687209,0.69125,0.706302,0.690437,0.673469,0.7425,0.64,0.384525,0.745613
6,0.0013,1.741937,0.68125,0.698225,0.680238,0.662921,0.7375,0.625,0.364816,0.745812


  _warn_prf(average, modifier, msg_start, len(result))




Model seed is: 94, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 ALBERT-L 94 2e-05 4


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7279,0.689865,0.52375,0.566553,0.51906,0.519833,0.6225,0.425,0.048454,0.536744
2,0.5108,0.674669,0.60875,0.644722,0.604698,0.590437,0.71,0.5075,0.222101,0.665431
3,0.1868,0.982278,0.615,0.635071,0.613832,0.603604,0.67,0.56,0.231404,0.67545
4,0.1924,1.298274,0.62125,0.653714,0.617892,0.602105,0.715,0.5275,0.246878,0.68205
5,0.0155,1.502687,0.62375,0.661417,0.619035,0.601227,0.735,0.5125,0.253864,0.687444
6,0.0881,1.543117,0.6275,0.667411,0.622058,0.602823,0.7475,0.5075,0.262677,0.685637


Model seed is: 791, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 ALBERT-L 791 2e-05 4


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7165,0.679645,0.54125,0.676081,0.445107,0.52251,0.9575,0.125,0.14891,0.646925
2,0.6598,0.682052,0.5525,0.23176,0.458032,0.818182,0.135,0.97,0.190822,0.705712
3,0.3817,0.680971,0.66625,0.708197,0.659208,0.629126,0.81,0.5225,0.347157,0.69585
4,0.2227,0.824705,0.6425,0.52649,0.619671,0.779412,0.3975,0.8875,0.326939,0.7235
5,0.1338,0.950259,0.6525,0.599424,0.64629,0.707483,0.52,0.785,0.316309,0.723525
6,0.0326,1.077675,0.6575,0.586103,0.646996,0.740458,0.485,0.83,0.335605,0.720691


Model seed is: 5, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 ALBERT-L 5 2e-05 4


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6885,0.662495,0.585,0.645299,0.57265,0.563433,0.755,0.415,0.180769,0.664413
2,0.6273,0.903331,0.5325,0.673077,0.426451,0.517473,0.9625,0.1025,0.127378,0.675319
3,0.2758,0.749523,0.635,0.681223,0.627161,0.604651,0.78,0.49,0.282124,0.687137
4,0.1207,0.95653,0.63625,0.653159,0.635383,0.624146,0.685,0.5875,0.273805,0.672862
5,0.0252,1.20005,0.6425,0.645161,0.64248,0.640394,0.65,0.635,0.285032,0.664669
6,0.0128,1.267151,0.64125,0.650426,0.641003,0.634204,0.6675,0.615,0.28289,0.669931


Model seed is: 6932, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 ALBERT-L 6932 2e-05 4


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7595,0.692812,0.53625,0.53567,0.536249,0.536341,0.535,0.5375,0.0725,0.538212
2,0.7561,0.765845,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.488403
3,0.6686,0.750844,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.504519
4,0.7624,0.701315,0.49625,0.645558,0.387574,0.497965,0.9175,0.075,-0.013923,0.513506
5,0.6996,0.697115,0.50125,0.613746,0.455021,0.50079,0.7925,0.21,0.003076,0.521763
6,0.651,0.69541,0.51625,0.642659,0.447055,0.509517,0.87,0.1625,0.045988,0.525238


Model seed is: 1759, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 ALBERT-L 1759 2e-05 4


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7263,0.685008,0.56875,0.4919,0.558654,0.598566,0.4175,0.72,0.144259,0.602506
2,0.7336,0.680326,0.5775,0.5,0.5671,0.612319,0.4225,0.7325,0.163031,0.615587
3,0.7334,0.699638,0.465,0.484337,0.464247,0.467442,0.5025,0.4275,-0.070198,0.524781
4,0.6779,0.680125,0.5675,0.649798,0.542219,0.545918,0.8025,0.3325,0.152946,0.657819
5,0.6475,0.661593,0.6275,0.575499,0.621825,0.668874,0.505,0.75,0.263016,0.672094
6,0.5611,0.661163,0.595,0.6639,0.577233,0.567376,0.8,0.39,0.208314,0.671494


Model seed is: 323, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 ALBERT-L 323 2e-05 4


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7698,0.764554,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.511125
2,0.7263,0.75355,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.4401
3,0.8071,0.764421,0.50125,0.019656,0.342603,0.571429,0.01,0.9925,0.013422,0.530356
4,0.7582,0.740775,0.49875,0.038369,0.3497,0.470588,0.02,0.9775,-0.008668,0.532219
5,0.7739,0.814917,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.487094
6,0.7451,0.752133,0.5025,0.00995,0.338865,1.0,0.005,1.0,0.050063,0.496725


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


Model seed is: 1694, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 ALBERT-L 1694 2e-05 4


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7349,0.683875,0.56125,0.636269,0.541757,0.543363,0.7675,0.355,0.134474,0.55525
2,0.5713,0.745389,0.575,0.647303,0.556356,0.553191,0.78,0.37,0.164458,0.614431
3,0.3097,0.928014,0.58375,0.648363,0.569205,0.561243,0.7675,0.4,0.180103,0.631719
4,0.2647,1.300197,0.58875,0.677134,0.555435,0.557351,0.8625,0.315,0.212116,0.647337
5,0.0757,1.635307,0.585,0.562005,0.583853,0.594972,0.5325,0.6375,0.170945,0.61635
6,0.0372,1.805274,0.58375,0.55894,0.582429,0.594366,0.5275,0.64,0.16857,0.611475


Model seed is: 9741, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 ALBERT-L 9741 2e-05 4


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7657,0.681549,0.54,0.269841,0.467037,0.653846,0.17,0.91,0.11894,0.616925
2,0.6627,0.689222,0.54,0.679443,0.432642,0.52139,0.975,0.105,0.162255,0.611387
3,0.5802,0.673701,0.595,0.580311,0.594503,0.602151,0.56,0.63,0.190467,0.634138
4,0.301,0.764943,0.6025,0.626761,0.600813,0.590708,0.6675,0.5375,0.206755,0.642037
5,0.1023,0.973526,0.6025,0.589147,0.60208,0.609626,0.57,0.635,0.205434,0.62435
6,0.0846,1.085011,0.6025,0.658065,0.591719,0.577358,0.765,0.44,0.216767,0.628825


Model seed is: 200, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 ALBERT-L 200 2e-05 4


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7292,0.697103,0.50125,0.63428,0.425195,0.500724,0.865,0.1375,0.003644,0.516975
2,0.6917,0.691229,0.53875,0.648236,0.489272,0.523883,0.85,0.2275,0.099026,0.533531
3,0.7291,0.690096,0.53875,0.592265,0.530665,0.530693,0.67,0.4075,0.080317,0.521287
4,0.709,0.694303,0.52,0.59408,0.503462,0.514652,0.7025,0.3375,0.042964,0.495794
5,0.6917,0.694079,0.51875,0.580153,0.508232,0.514507,0.665,0.3725,0.039215,0.516894
6,0.6711,0.694565,0.5225,0.596195,0.506048,0.516484,0.705,0.34,0.048335,0.514444


Model seed is: 999, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 ALBERT-L 999 2e-05 4


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7159,0.677406,0.56375,0.38448,0.523314,0.652695,0.2725,0.855,0.156859,0.631319
2,0.6157,0.642535,0.645,0.691304,0.636829,0.611538,0.795,0.495,0.304003,0.701037
3,0.3403,0.62127,0.66875,0.668335,0.668749,0.669173,0.6675,0.67,0.337501,0.713706
4,0.1133,0.768244,0.66625,0.716861,0.655234,0.622468,0.845,0.4875,0.356029,0.718956
5,0.0476,0.887263,0.67,0.671642,0.669992,0.668317,0.675,0.665,0.340017,0.726475
6,0.0163,0.960106,0.66875,0.66242,0.668634,0.675325,0.65,0.6875,0.337738,0.7257




Model seed is: 94, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 DISRoBERTa-B 94 2e-05 4


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6897,0.690986,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.699066
2,0.6777,0.684037,0.6125,0.448399,0.574874,0.777778,0.315,0.91,0.279946,0.718875
3,0.6651,0.65998,0.575,0.689781,0.507589,0.543103,0.945,0.205,0.223013,0.725353
4,0.5007,0.614748,0.66375,0.670747,0.663598,0.657074,0.685,0.6425,0.327796,0.727031
5,0.3287,0.614353,0.6775,0.689157,0.677046,0.665116,0.715,0.64,0.356003,0.727569
6,0.2973,0.629458,0.67375,0.694021,0.672312,0.653422,0.74,0.6075,0.350591,0.730187


Model seed is: 791, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 DISRoBERTa-B 791 2e-05 4


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6941,0.69454,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.617381
2,0.6855,0.682909,0.50125,0.666109,0.340462,0.500629,0.995,0.0075,0.015861,0.663294
3,0.5753,0.672141,0.58125,0.680038,0.537126,0.550232,0.89,0.2725,0.206593,0.680875
4,0.4612,0.653293,0.6425,0.660333,0.641512,0.628959,0.695,0.59,0.286584,0.693037
5,0.3346,0.681386,0.64625,0.661078,0.645572,0.634483,0.69,0.6025,0.293626,0.699531
6,0.3218,0.690686,0.65625,0.652339,0.656206,0.659847,0.645,0.6675,0.312579,0.702356


Model seed is: 5, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 DISRoBERTa-B 5 2e-05 4


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6974,0.686307,0.59125,0.675917,0.561309,0.559934,0.8525,0.33,0.214041,0.688594
2,0.6763,0.680273,0.505,0.668896,0.344349,0.502513,1.0,0.01,0.070888,0.71745
3,0.5893,0.647062,0.62875,0.694758,0.610537,0.589878,0.845,0.4125,0.285593,0.728719
4,0.4287,0.670573,0.6325,0.702429,0.611018,0.590136,0.8675,0.3975,0.300227,0.737875
5,0.2729,0.64418,0.66875,0.703247,0.664212,0.636917,0.785,0.5525,0.347009,0.741794
6,0.1968,0.67464,0.67125,0.705487,0.666746,0.638945,0.7875,0.555,0.35215,0.740631


Model seed is: 6932, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 DISRoBERTa-B 6932 2e-05 4


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7049,0.696333,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.640838
2,0.6887,0.689136,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.682662
3,0.6546,0.676332,0.5925,0.678501,0.561093,0.560261,0.86,0.325,0.218973,0.688831
4,0.5517,0.688271,0.57875,0.672498,0.541152,0.550079,0.865,0.2925,0.192095,0.676269
5,0.363,0.678611,0.62375,0.643787,0.622556,0.611236,0.68,0.5675,0.249081,0.684319
6,0.2745,0.696963,0.62625,0.643623,0.62536,0.615034,0.675,0.5775,0.253709,0.688794


Model seed is: 1759, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 DISRoBERTa-B 1759 2e-05 4


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.688,0.688954,0.56375,0.685869,0.486084,0.535865,0.9525,0.175,0.20274,0.669275
2,0.6693,0.679783,0.50125,0.666667,0.338296,0.500627,0.9975,0.005,0.020451,0.713719
3,0.5391,0.683945,0.56625,0.687106,0.490192,0.537377,0.9525,0.18,0.208657,0.728244
4,0.4224,0.623867,0.66875,0.684899,0.667878,0.653061,0.72,0.6175,0.339287,0.734775
5,0.2699,0.669357,0.67,0.7,0.666667,0.641667,0.77,0.57,0.347011,0.74545
6,0.1224,0.694394,0.685,0.699284,0.684288,0.66895,0.7325,0.6375,0.371681,0.745725


Model seed is: 323, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 DISRoBERTa-B 323 2e-05 4


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6865,0.69229,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.698531
2,0.7014,0.690559,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.718094
3,0.6692,0.67921,0.55625,0.682184,0.473597,0.531381,0.9525,0.16,0.184465,0.7324
4,0.6213,0.659949,0.56875,0.679666,0.510005,0.54062,0.915,0.2225,0.190597,0.731269
5,0.5521,0.630057,0.64375,0.691224,0.635125,0.609943,0.7975,0.49,0.302139,0.738662
6,0.4704,0.626471,0.64,0.689655,0.630542,0.606061,0.8,0.48,0.29554,0.7401


Model seed is: 1694, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 DISRoBERTa-B 1694 2e-05 4


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7246,0.695442,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.642587
2,0.6748,0.686639,0.5675,0.672968,0.517296,0.541033,0.89,0.245,0.176659,0.673156
3,0.6279,0.659032,0.65875,0.656604,0.658737,0.660759,0.6525,0.665,0.317525,0.675506
4,0.4356,0.66934,0.6375,0.679912,0.631022,0.608696,0.77,0.505,0.285196,0.681963
5,0.2829,0.771176,0.6525,0.649874,0.65248,0.654822,0.645,0.66,0.305034,0.678494
6,0.1353,0.833378,0.64625,0.634839,0.645904,0.656,0.615,0.6775,0.293073,0.677569


  _warn_prf(average, modifier, msg_start, len(result))


Model seed is: 9741, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 DISRoBERTa-B 9741 2e-05 4


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.716,0.692984,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.623644
2,0.6789,0.689102,0.62,0.680672,0.605768,0.586957,0.81,0.43,0.259463,0.673425
3,0.6875,0.68732,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.697975
4,0.6154,0.671277,0.59125,0.695247,0.537377,0.554235,0.9325,0.25,0.249697,0.692444
5,0.5407,0.662421,0.6075,0.686,0.581333,0.571667,0.8575,0.3575,0.248261,0.689431
6,0.4465,0.675845,0.615,0.690141,0.590945,0.577441,0.8575,0.3725,0.263003,0.689006


Model seed is: 200, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 DISRoBERTa-B 200 2e-05 4


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7044,0.689232,0.56125,0.30495,0.492201,0.733333,0.1925,0.93,0.181388,0.662931
2,0.6707,0.6832,0.55875,0.292585,0.485984,0.737374,0.1825,0.935,0.178411,0.694575
3,0.6184,0.656543,0.63,0.698574,0.609805,0.589347,0.8575,0.4025,0.291974,0.687937
4,0.4025,0.641623,0.645,0.662708,0.644019,0.631222,0.6975,0.5925,0.291612,0.690006
5,0.3684,0.69957,0.63875,0.654719,0.637976,0.627002,0.685,0.5925,0.278695,0.689487
6,0.2643,0.738552,0.6475,0.675862,0.64478,0.625532,0.735,0.56,0.299624,0.688906


Model seed is: 999, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 DISRoBERTa-B 999 2e-05 4


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7093,0.691075,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.658556
2,0.6796,0.685845,0.67625,0.697076,0.674713,0.654945,0.745,0.6075,0.35588,0.707956
3,0.6595,0.676181,0.55875,0.685103,0.474075,0.532594,0.96,0.1575,0.196932,0.720156
4,0.5647,0.640532,0.63625,0.690096,0.624927,0.601113,0.81,0.4625,0.290611,0.718675
5,0.4119,0.628193,0.66375,0.691867,0.660927,0.638478,0.755,0.5725,0.333094,0.715481
6,0.3635,0.633577,0.665,0.690531,0.662704,0.641631,0.7475,0.5825,0.334586,0.714531




Model seed is: 94, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 XLNet-B 94 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7015,0.733592,0.51875,0.186047,0.422216,0.60274,0.11,0.9275,0.065112,0.604944
2,0.6956,0.676653,0.58125,0.631463,0.573329,0.563851,0.7175,0.445,0.168892,0.58275
3,0.6326,0.674601,0.6025,0.646667,0.59619,0.582,0.7275,0.4775,0.211723,0.610413
4,0.677,0.664001,0.625,0.609375,0.624399,0.63587,0.585,0.665,0.250804,0.642206
5,0.5223,0.683445,0.57,0.654618,0.542541,0.54698,0.815,0.325,0.160602,0.610469
6,0.5101,0.681187,0.5775,0.653689,0.556011,0.553819,0.7975,0.3575,0.172606,0.609031


Model seed is: 791, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 XLNet-B 791 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7251,0.681101,0.5825,0.557029,0.581115,0.59322,0.525,0.64,0.166102,0.5904
2,0.7025,0.811903,0.54375,0.265594,0.467339,0.680412,0.165,0.9225,0.134031,0.645069
3,0.4896,0.70227,0.62,0.674518,0.609031,0.589888,0.7875,0.4525,0.254718,0.666275
4,0.4148,0.760712,0.62375,0.627014,0.623721,0.621622,0.6325,0.615,0.247538,0.669581
5,0.3519,0.90905,0.6275,0.670354,0.621097,0.60119,0.7575,0.4975,0.264082,0.670681
6,0.2031,0.910426,0.6225,0.643868,0.621136,0.609375,0.6825,0.5625,0.246783,0.667856


Model seed is: 5, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 XLNet-B 5 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7162,0.665481,0.58,0.647059,0.56427,0.557971,0.77,0.39,0.172976,0.620419
2,0.6827,0.702989,0.55,0.669725,0.481921,0.528986,0.9125,0.1875,0.145191,0.646844
3,0.545,0.731999,0.57125,0.68093,0.513798,0.542222,0.915,0.2275,0.196231,0.658112
4,0.5016,0.743833,0.57875,0.679962,0.531937,0.548239,0.895,0.2625,0.203341,0.665488
5,0.3673,0.708663,0.62,0.674518,0.609031,0.589888,0.7875,0.4525,0.254718,0.6843
6,0.3765,0.808018,0.58875,0.690499,0.538918,0.553544,0.9175,0.26,0.235582,0.678575


Model seed is: 6932, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 XLNet-B 6932 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6903,0.698911,0.4825,0.475949,0.482419,0.482051,0.47,0.495,-0.035011,0.479994
2,0.6758,0.690731,0.50125,0.659847,0.36271,0.500647,0.9675,0.035,0.006922,0.573331
3,0.6599,0.688983,0.55875,0.404722,0.527088,0.621762,0.3,0.8175,0.137317,0.583013
4,0.6797,0.700795,0.53375,0.667261,0.444278,0.518724,0.935,0.1325,0.113131,0.625875
5,0.5982,0.679161,0.575,0.662028,0.544819,0.549505,0.8325,0.3175,0.17499,0.639012
6,0.5312,0.674779,0.58625,0.660513,0.565456,0.56,0.805,0.3675,0.191833,0.644381


Model seed is: 1759, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 XLNet-B 1759 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6929,0.685991,0.55125,0.53316,0.550575,0.555556,0.5125,0.59,0.102809,0.532216
2,0.6457,0.679398,0.55375,0.507586,0.549793,0.566154,0.46,0.6475,0.109441,0.587888
3,0.6574,0.672828,0.57125,0.625137,0.562203,0.55534,0.715,0.4275,0.148781,0.598294
4,0.56,0.701384,0.58,0.661972,0.553758,0.553872,0.8225,0.3375,0.182959,0.61785
5,0.5546,0.69244,0.59125,0.632171,0.586128,0.574642,0.7025,0.48,0.187192,0.6347
6,0.5657,0.687544,0.61125,0.628435,0.610417,0.601831,0.6575,0.565,0.223458,0.6383


Model seed is: 323, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 XLNet-B 323 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6763,0.678934,0.57625,0.647975,0.557896,0.554174,0.78,0.3725,0.166994,0.584556
2,0.6333,0.694614,0.57875,0.493233,0.566403,0.618868,0.41,0.7475,0.167317,0.613338
3,0.4782,0.766324,0.61,0.615764,0.609912,0.606796,0.625,0.595,0.220099,0.628812
4,0.4587,0.80175,0.61375,0.589641,0.612412,0.628895,0.555,0.6725,0.229087,0.641531
5,0.3819,0.793351,0.62625,0.641056,0.625613,0.616628,0.6675,0.585,0.253364,0.662744
6,0.2469,0.80728,0.63375,0.650775,0.632878,0.621868,0.6825,0.585,0.268781,0.667294


Model seed is: 1694, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 XLNet-B 1694 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7609,0.829074,0.5325,0.664875,0.446074,0.518156,0.9275,0.1375,0.106017,0.640381
2,0.6454,0.674761,0.6125,0.545455,0.603882,0.659574,0.465,0.76,0.235479,0.658931
3,0.4301,0.727462,0.62125,0.664452,0.614866,0.596421,0.75,0.4925,0.250963,0.6758
4,0.4051,0.792533,0.62125,0.67101,0.612383,0.59309,0.7725,0.47,0.25442,0.681925
5,0.2712,0.761212,0.63375,0.630517,0.633722,0.636132,0.625,0.6425,0.267541,0.68785
6,0.2211,0.78597,0.64,0.652174,0.639558,0.630841,0.675,0.605,0.280689,0.689506


Model seed is: 9741, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 XLNet-B 9741 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7011,0.685107,0.55625,0.52349,0.554143,0.565217,0.4875,0.625,0.113579,0.563575
2,0.7378,0.68802,0.54625,0.447489,0.531273,0.571984,0.3675,0.725,0.099046,0.541806
3,0.6741,0.68795,0.5125,0.632768,0.453931,0.507553,0.84,0.185,0.033085,0.561319
4,0.6303,0.693798,0.54,0.30303,0.479873,0.625,0.2,0.88,0.109109,0.540931
5,0.623,0.691731,0.54125,0.410915,0.517638,0.573991,0.32,0.7625,0.091997,0.542312
6,0.6495,0.688845,0.54375,0.562874,0.542875,0.54023,0.5875,0.5,0.087837,0.552456


Model seed is: 200, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 XLNet-B 200 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.8281,0.751875,0.50375,0.667782,0.343768,0.501887,0.9975,0.01,0.047583,0.556319
2,0.6713,0.675501,0.5825,0.450658,0.556982,0.658654,0.3425,0.8225,0.188084,0.593744
3,0.565,0.667893,0.6175,0.68125,0.601562,0.583929,0.8175,0.4175,0.256406,0.62205
4,0.4975,0.658969,0.63125,0.608234,0.629973,0.648725,0.5725,0.69,0.264331,0.634562
5,0.4406,0.675049,0.6225,0.671739,0.613811,0.594231,0.7725,0.4725,0.25683,0.65075
6,0.478,0.685234,0.62875,0.683706,0.617193,0.595547,0.8025,0.455,0.274614,0.658256


Model seed is: 999, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 XLNet-B 999 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.8094,0.702232,0.51,0.105023,0.383837,0.605263,0.0575,0.9625,0.047013,0.591938
2,0.6947,0.700398,0.53,0.675862,0.410658,0.515789,0.98,0.08,0.137649,0.613962
3,0.5499,0.679269,0.5575,0.40604,0.526725,0.617347,0.3025,0.8125,0.133694,0.61465
4,0.4368,0.673671,0.6125,0.672304,0.599149,0.582418,0.795,0.43,0.241674,0.624506
5,0.4205,0.666885,0.60125,0.589447,0.60092,0.607427,0.5725,0.63,0.202836,0.651563
6,0.3888,0.669812,0.60625,0.596671,0.606028,0.611549,0.5825,0.63,0.21274,0.656763




Model seed is: 94, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 XLNet-L 94 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7232,0.689496,0.5175,0.089623,0.380696,0.791667,0.0475,0.9875,0.102587,0.594256
2,0.7096,0.698495,0.51375,0.664366,0.39114,0.507246,0.9625,0.065,0.062356,0.572362
3,0.6826,0.679966,0.55,0.666048,0.488197,0.529499,0.8975,0.2025,0.13908,0.644081
4,0.6314,0.672742,0.56625,0.632025,0.551934,0.548803,0.745,0.3875,0.141876,0.640825
5,0.6458,0.68448,0.57375,0.449111,0.550753,0.634703,0.3475,0.8,0.165402,0.60495
6,0.5822,0.717878,0.57,0.650407,0.545982,0.547945,0.8,0.34,0.157672,0.62095


Model seed is: 791, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 XLNet-L 791 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7059,0.694993,0.5225,0.578366,0.513967,0.517787,0.655,0.39,0.046668,0.514919
2,0.8039,0.696201,0.53,0.655046,0.458895,0.517391,0.8925,0.1675,0.087114,0.556575
3,0.7198,0.705133,0.58875,0.588235,0.588749,0.588972,0.5875,0.59,0.177501,0.629806
4,0.6208,0.729632,0.5675,0.656746,0.536143,0.544408,0.8275,0.3075,0.158049,0.634131
5,0.5243,0.749185,0.5725,0.67052,0.53099,0.545455,0.87,0.275,0.18041,0.651481
6,0.4117,0.770034,0.5775,0.675,0.535714,0.548438,0.8775,0.2775,0.19375,0.653906


Model seed is: 5, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 XLNet-L 5 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6729,0.694041,0.52625,0.657633,0.444435,0.514851,0.91,0.1425,0.081897,0.522144
2,0.7616,0.679245,0.56,0.660232,0.518059,0.537736,0.855,0.265,0.148625,0.573006
3,0.608,0.686649,0.55125,0.662911,0.495941,0.530827,0.8825,0.22,0.136838,0.587269
4,0.5198,0.670502,0.59,0.670683,0.563818,0.560403,0.835,0.345,0.206488,0.637331
5,0.4675,0.666392,0.61125,0.659365,0.603336,0.586745,0.7525,0.47,0.231948,0.673375
6,0.4539,0.670322,0.6075,0.66453,0.595819,0.580224,0.7775,0.4375,0.22862,0.675569


Model seed is: 6932, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 XLNet-L 6932 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7136,0.695989,0.51375,0.26465,0.450719,0.542636,0.175,0.8525,0.037388,0.496425
2,0.7277,0.697038,0.50125,0.664987,0.344719,0.500632,0.99,0.0125,0.011852,0.553106
3,0.6954,0.686474,0.5425,0.595133,0.534635,0.53373,0.6725,0.4125,0.088027,0.608081
4,0.7491,0.713082,0.5025,0.667224,0.341038,0.501256,0.9975,0.0075,0.035444,0.54235
5,0.6606,0.689427,0.545,0.645914,0.504775,0.528662,0.83,0.26,0.109536,0.598406
6,0.6973,0.686981,0.54625,0.551298,0.546193,0.545232,0.5575,0.535,0.092523,0.607725


Model seed is: 1759, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 XLNet-L 1759 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6683,0.703675,0.50375,0.047962,0.356187,0.588235,0.025,0.9825,0.026003,0.492453
2,0.8041,0.793678,0.51375,0.332762,0.475132,0.530055,0.2425,0.785,0.032736,0.544687
3,0.7772,0.705911,0.49625,0.056206,0.356321,0.444444,0.03,0.9625,-0.020766,0.541269
4,0.6334,0.697506,0.4975,0.106667,0.378551,0.48,0.06,0.935,-0.010328,0.538828
5,0.7605,0.69158,0.545,0.65725,0.490335,0.52719,0.8725,0.2175,0.119106,0.552672
6,0.7078,0.703515,0.55875,0.642351,0.533247,0.540034,0.7925,0.325,0.13292,0.569519


Model seed is: 323, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 XLNet-L 323 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7449,0.707631,0.50375,0.667225,0.345898,0.501892,0.995,0.0125,0.040266,0.510719
2,0.7209,0.688935,0.52375,0.14382,0.406975,0.711111,0.08,0.9675,0.10308,0.609994
3,0.7545,0.690624,0.5175,0.134529,0.40002,0.652174,0.075,0.96,0.075173,0.586631
4,0.675,0.710934,0.4975,0.00495,0.334415,0.25,0.0025,0.9925,-0.035444,0.625387
5,0.7823,0.690328,0.5175,0.134529,0.40002,0.652174,0.075,0.96,0.075173,0.618169
6,0.6851,0.690082,0.525,0.166667,0.417249,0.678571,0.095,0.955,0.097983,0.612063


Model seed is: 1694, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 XLNet-L 1694 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.8383,0.68896,0.545,0.47093,0.535904,0.5625,0.405,0.685,0.09375,0.562887
2,0.6638,0.683333,0.545,0.372414,0.507776,0.6,0.27,0.82,0.107763,0.588863
3,0.6905,0.673492,0.59375,0.604141,0.59347,0.589074,0.62,0.5675,0.187759,0.622106
4,0.6011,0.668449,0.595,0.64859,0.585357,0.572797,0.7475,0.4425,0.199506,0.634269
5,0.4851,0.672463,0.61375,0.657048,0.607494,0.590818,0.74,0.4875,0.235119,0.642781
6,0.4677,0.691256,0.60875,0.666667,0.596571,0.580705,0.7825,0.435,0.231955,0.644663


Model seed is: 9741, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 XLNet-L 9741 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7238,0.6876,0.54875,0.550436,0.548744,0.548387,0.5525,0.545,0.097503,0.553812
2,0.7545,0.694188,0.49625,0.64926,0.377845,0.497997,0.9325,0.06,-0.01535,0.504188
3,0.6957,0.691949,0.50125,0.645963,0.401205,0.500688,0.91,0.0925,0.004341,0.520813
4,0.7019,0.701917,0.50875,0.053012,0.360683,0.733333,0.0275,0.99,0.064509,0.530253
5,0.6937,0.695153,0.51625,0.089412,0.380025,0.76,0.0475,0.985,0.093395,0.551894
6,0.7295,0.693938,0.50625,0.669456,0.347074,0.503145,1.0,0.0125,0.079305,0.551269


Model seed is: 200, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 XLNet-L 200 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7558,0.706191,0.515,0.17094,0.414092,0.588235,0.1,0.93,0.053786,0.445578
2,0.7436,0.693714,0.50125,0.666109,0.340462,0.500629,0.995,0.0075,0.015861,0.525697
3,0.7976,0.712173,0.51375,0.48883,0.512592,0.515235,0.465,0.5625,0.027632,0.521962
4,0.7883,0.697406,0.49625,0.66106,0.340262,0.498099,0.9825,0.01,-0.032202,0.559269
5,0.7631,0.69874,0.49875,0.664996,0.334979,0.499373,0.995,0.0025,-0.020451,0.566084
6,0.7045,0.700531,0.49875,0.665555,0.332777,0.499374,0.9975,0.0,-0.035377,0.54955


Model seed is: 999, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 XLNet-L 999 2e-05 4


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.766,0.698318,0.5275,0.357143,0.491813,0.558511,0.2625,0.7925,0.064859,0.508219
2,0.6715,0.77356,0.50375,0.667225,0.345898,0.501892,0.995,0.0125,0.040266,0.531675
3,0.7005,0.742682,0.55125,0.680321,0.46385,0.528354,0.955,0.1475,0.173768,0.560881
4,0.555,0.814691,0.53875,0.67083,0.450237,0.521498,0.94,0.1375,0.129891,0.545419
5,0.5131,0.797183,0.56,0.659574,0.518833,0.537855,0.8525,0.2675,0.147959,0.618594
6,0.5826,0.782808,0.56625,0.664734,0.525288,0.541732,0.86,0.2725,0.163737,0.628575




Model seed is: 94, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 DEBERT-B 94 2e-05 4


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7024,0.694206,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.612441
2,0.6884,0.69359,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.597969
3,0.6966,0.693053,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.600569
4,0.6779,0.691576,0.5125,0.664948,0.385227,0.506545,0.9675,0.0575,0.060298,0.602644
5,0.6454,0.799807,0.5225,0.204167,0.431548,0.6125,0.1225,0.9225,0.075,0.582281
6,0.4665,0.71928,0.5625,0.480712,0.551371,0.591241,0.405,0.72,0.131705,0.594812


Model seed is: 791, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 DEBERT-B 791 2e-05 4


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6961,0.694579,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.580409
2,0.6939,0.693299,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.626631
3,0.6429,0.763428,0.53375,0.671366,0.434608,0.518367,0.9525,0.115,0.123527,0.611225
4,0.5926,0.772705,0.57625,0.649431,0.556943,0.553792,0.785,0.3675,0.167826,0.648925
5,0.1982,0.935342,0.59875,0.467662,0.572848,0.694581,0.3525,0.845,0.22693,0.65165
6,0.168,0.932158,0.605,0.564738,0.601591,0.628834,0.5125,0.6975,0.213689,0.655219


Model seed is: 5, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 DEBERT-B 5 2e-05 4


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6961,0.695446,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.613694
2,0.6901,0.694394,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.680075
3,0.6439,0.681927,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.693456
4,0.5659,0.803414,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.706113
5,0.4189,1.171812,0.5125,0.672269,0.360525,0.506329,1.0,0.025,0.112509,0.701081
6,0.4929,0.929685,0.60625,0.699714,0.564013,0.565485,0.9175,0.295,0.271524,0.711525


Model seed is: 6932, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 DEBERT-B 6932 2e-05 4


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6946,0.693994,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.588512
2,0.5795,0.814098,0.525,0.170306,0.418778,0.672414,0.0975,0.9525,0.096408,0.63975
3,0.3936,0.791961,0.635,0.668934,0.631125,0.612033,0.7375,0.5325,0.275859,0.67715
4,0.2588,1.420149,0.6225,0.619647,0.622479,0.624365,0.615,0.63,0.245028,0.668175
5,0.2216,1.664661,0.64,0.676404,0.635385,0.614286,0.7525,0.5275,0.287368,0.674512
6,0.0061,1.693603,0.63375,0.665906,0.630325,0.612159,0.73,0.5375,0.272598,0.672


Model seed is: 1759, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 DEBERT-B 1759 2e-05 4


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6968,0.692971,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.590706
2,0.6885,0.689562,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.682691
3,0.4237,0.740858,0.63625,0.670442,0.632292,0.612836,0.74,0.5325,0.278563,0.698519
4,0.1306,1.196396,0.64375,0.60251,0.639874,0.681388,0.54,0.7475,0.293897,0.702913
5,0.176,1.562988,0.61,0.681633,0.589203,0.575862,0.835,0.385,0.246353,0.700113
6,0.0406,1.508,0.63125,0.673311,0.625034,0.604374,0.76,0.5025,0.271661,0.7031


Model seed is: 323, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 DEBERT-B 323 2e-05 4


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6938,0.692984,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.620303
2,0.6974,0.692564,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.672056
3,0.6883,0.660598,0.64625,0.648447,0.646236,0.644444,0.6525,0.64,0.292523,0.680206
4,0.4018,0.702592,0.65125,0.642766,0.651053,0.658793,0.6275,0.675,0.302842,0.6953
5,0.2503,0.991675,0.66375,0.670747,0.663598,0.657074,0.685,0.6425,0.327796,0.711537
6,0.1342,1.18266,0.65375,0.642581,0.653412,0.664,0.6225,0.685,0.308102,0.706119


  _warn_prf(average, modifier, msg_start, len(result))


Model seed is: 1694, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 DEBERT-B 1694 2e-05 4


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7076,0.700004,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.650025
2,0.6159,0.655639,0.62375,0.668137,0.616897,0.597633,0.7575,0.49,0.256861,0.672312
3,0.3353,0.764693,0.63875,0.650544,0.638338,0.629977,0.6725,0.605,0.278134,0.682306
4,0.0953,1.318977,0.62125,0.701478,0.591765,0.578862,0.89,0.3525,0.287573,0.69785
5,0.0142,1.356564,0.655,0.66586,0.654635,0.64554,0.6875,0.6225,0.310657,0.699788
6,0.0076,1.438101,0.6575,0.665854,0.657286,0.65,0.6825,0.6325,0.315394,0.698669


Model seed is: 9741, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 DEBERT-B 9741 2e-05 4


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.6936,0.698706,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.586881
2,0.7177,0.697291,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.624141
3,0.6246,0.752996,0.5125,0.173729,0.413992,0.569444,0.1025,0.9225,0.043679,0.613562
4,0.3109,0.786488,0.615,0.660044,0.60812,0.590909,0.7475,0.4825,0.238528,0.64825
5,0.2064,1.207592,0.6075,0.632319,0.605703,0.594714,0.675,0.54,0.216986,0.650313
6,0.1298,1.38368,0.605,0.633411,0.602613,0.590909,0.6825,0.5275,0.212569,0.648294


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Model seed is: 200, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 DEBERT-B 200 2e-05 4


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7053,0.695382,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.639803
2,0.7015,0.694035,0.5,0.0,0.333333,0.0,0.0,1.0,0.0,0.662591
3,0.6298,0.645432,0.635,0.609626,0.633451,0.655172,0.57,0.7,0.272311,0.690519
4,0.3761,0.731014,0.64375,0.656212,0.643281,0.634033,0.68,0.6075,0.288259,0.677494
5,0.344,0.93956,0.6425,0.647783,0.64242,0.63835,0.6575,0.6275,0.285128,0.6808
6,0.1478,1.029387,0.65,0.667458,0.649033,0.635747,0.7025,0.5975,0.301668,0.681075


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Model seed is: 999, total number of training (for each informative and uninform class) and test samples: 80, 7120 0.01 DEBERT-B 999 2e-05 4


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Macro F1,Precision,Recall,Specificity,Mcc,Auc
1,0.7216,0.696636,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.581244
2,0.6888,0.694256,0.5,0.666667,0.333333,0.5,1.0,0.0,0.0,0.623931
3,0.69,0.689817,0.6075,0.668076,0.593977,0.578755,0.79,0.425,0.230933,0.6461
4,0.5849,0.671983,0.59,0.677165,0.557761,0.558442,0.86,0.32,0.213862,0.674447
5,0.5365,0.898099,0.62375,0.531882,0.608679,0.703704,0.4275,0.82,0.269094,0.695794
6,0.2883,0.811777,0.63875,0.629012,0.638501,0.646438,0.6125,0.665,0.277883,0.698738


Unnamed: 0,category,ratio,model,seed,accuracy,f1-score,macro_f1,precision,recall,specificity,mcc,AUC
0,oral-care,0.10,ALBERT-L,94,0.528438,0.691222,0.346933,0.528390,0.999113,0.001325,0.006609,0.621835
1,oral-care,0.10,ALBERT-L,791,0.678906,0.656182,0.677497,0.755393,0.580006,0.789665,0.375835,0.753581
2,oral-care,0.10,ALBERT-L,5,0.738906,0.761864,0.736457,0.735149,0.790594,0.681020,0.475292,0.783641
3,oral-care,0.10,ALBERT-L,6932,0.749219,0.759623,0.748748,0.769417,0.750074,0.748261,0.497761,0.814214
4,oral-care,0.10,ALBERT-L,1759,0.637656,0.688265,0.627848,0.630853,0.757172,0.503809,0.270467,0.667575
...,...,...,...,...,...,...,...,...,...,...,...,...
195,oral-care,0.01,DEBERT-B,323,0.649719,0.655048,0.649635,0.678704,0.632986,0.668245,0.300901,0.713786
196,oral-care,0.01,DEBERT-B,1694,0.652247,0.671269,0.651079,0.666843,0.675755,0.626221,0.302222,0.702237
197,oral-care,0.01,DEBERT-B,9741,0.634972,0.670220,0.630754,0.637923,0.705961,0.556378,0.265547,0.685477
198,oral-care,0.01,DEBERT-B,200,0.653090,0.681783,0.650246,0.658045,0.707298,0.593075,0.302531,0.701000


TypeError: Could not convert ALBERT-LALBERT-LALBERT-LALBERT-LALBERT-LALBERT-LALBERT-LALBERT-LALBERT-LALBERT-LDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BXLNet-BXLNet-BXLNet-BXLNet-BXLNet-BXLNet-BXLNet-BXLNet-BXLNet-BXLNet-BXLNet-LXLNet-LXLNet-LXLNet-LXLNet-LXLNet-LXLNet-LXLNet-LXLNet-LXLNet-LDEBERT-BDEBERT-BDEBERT-BDEBERT-BDEBERT-BDEBERT-BDEBERT-BDEBERT-BDEBERT-BDEBERT-BALBERT-LALBERT-LALBERT-LALBERT-LALBERT-LALBERT-LALBERT-LALBERT-LALBERT-LALBERT-LDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BXLNet-BXLNet-BXLNet-BXLNet-BXLNet-BXLNet-BXLNet-BXLNet-BXLNet-BXLNet-BXLNet-LXLNet-LXLNet-LXLNet-LXLNet-LXLNet-LXLNet-LXLNet-LXLNet-LXLNet-LDEBERT-BDEBERT-BDEBERT-BDEBERT-BDEBERT-BDEBERT-BDEBERT-BDEBERT-BDEBERT-BDEBERT-BALBERT-LALBERT-LALBERT-LALBERT-LALBERT-LALBERT-LALBERT-LALBERT-LALBERT-LALBERT-LDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BXLNet-BXLNet-BXLNet-BXLNet-BXLNet-BXLNet-BXLNet-BXLNet-BXLNet-BXLNet-BXLNet-LXLNet-LXLNet-LXLNet-LXLNet-LXLNet-LXLNet-LXLNet-LXLNet-LXLNet-LDEBERT-BDEBERT-BDEBERT-BDEBERT-BDEBERT-BDEBERT-BDEBERT-BDEBERT-BDEBERT-BDEBERT-BALBERT-LALBERT-LALBERT-LALBERT-LALBERT-LALBERT-LALBERT-LALBERT-LALBERT-LALBERT-LDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BDISRoBERTa-BXLNet-BXLNet-BXLNet-BXLNet-BXLNet-BXLNet-BXLNet-BXLNet-BXLNet-BXLNet-BXLNet-LXLNet-LXLNet-LXLNet-LXLNet-LXLNet-LXLNet-LXLNet-LXLNet-LXLNet-LDEBERT-BDEBERT-BDEBERT-BDEBERT-BDEBERT-BDEBERT-BDEBERT-BDEBERT-BDEBERT-BDEBERT-B to numeric

In [None]:

# Specify the path in Google Drive
file_path = f'/content/drive/MyDrive/paper1/temp/Sample_Efficiency/all_results_sample_efficiency_after_revision_3'
# Serialize and save the dictionary
with open(file_path, 'wb') as handle:
     pickle.dump(all_datasets_ratio_models_seed_results, handle, protocol=pickle.HIGHEST_PROTOCOL)