In [None]:
# Mount your Google Drive if needed

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# !pip install transformers[torch]==4.31.0
!pip install transformers
!pip install evaluate


Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting datasets>=2.0.0 (from evaluate)
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting dill (from evaluate)
  Downloading dill-0.3.9-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from evaluate)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from evaluate)
  Downloading multiprocess-0.70.17-py310-none-any.whl.metadata (7.2 kB)
Collecting dill (from evaluate)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting multiprocess (from evaluate)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec>=2021.05.0 (from fsspec[http]>=2021.05.0->evaluate)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [

In [None]:
# Note: Some libraries or dependencies used in this code may become deprecated over time.
# If you encounter deprecated libraries, replace them with suitable alternatives and
# update the corresponding sections of the code that rely on the deprecated classes or methods accordingly.

import os
import json
import gzip
import pandas as pd
import collections
import nltk
from google.colab import files

from urllib.request import urlopen

import random
import numpy as np
from tqdm import tqdm_notebook as tqdm
from collections import defaultdict
import pickle
from IPython.display import clear_output
import torch
from transformers import RobertaTokenizerFast
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support,  recall_score, precision_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split,KFold, StratifiedKFold
from transformers import  Trainer, TrainingArguments, EarlyStoppingCallback, AutoTokenizer, AutoModelForSequenceClassification
# from datasets import load_metric
import evaluate


In [None]:
if torch.cuda.is_available():
    print(torch.cuda.device_count())
    print(torch.cuda.get_device_name(0))

1
Tesla T4


# Training

In [None]:

def set_seed(SEED):
    """ Set random seed to all """
    random.seed(SEED)
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
    torch.backends.cudnn.deterministic = True



class AmazonDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels= None):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        if self.labels:
           item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.encodings["input_ids"])




def prepare_auxilary_datasets(dataset):

    X_test =  dataset["sentence"]
    y_test =  dataset["label"]   if "label" in dataset.columns.values else   dataset["helpful"]
    # Encoding
    test_encodings = tokenizer(X_test.tolist(), truncation=True, padding=True, max_length=40)
    test_dataset = AmazonDataset(test_encodings, y_test.tolist())
    return test_dataset


def compute_metrics(eval_pred):

    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=1)

    accuracy = accuracy_score(y_true=labels, y_pred=predictions)
    recall = recall_score(y_true=labels, y_pred=predictions)
    precision = precision_score(y_true=labels, y_pred=predictions)
    f1 = f1_score(y_true=labels, y_pred=predictions)
    auc = roc_auc_score(y_true= labels, y_score= logits[:, 1])

    return {
        'accuracy': accuracy,
        'f1': f1,
        'precision': precision,
        'recall': recall,
        'AUC': auc
    }




def return_training_args():

    training_args = TrainingArguments(
        output_dir="test-amazon",
        #evaluation_strategy = "epoch",  # there would be no evaluation during training when doing simple K-fold cross validation a
        learning_rate=3e-5,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=64,
        max_steps = 1500,
        weight_decay=0.01,
        load_best_model_at_end=False,
        logging_strategy = "steps",
        logging_steps = 100,
        save_strategy= "no",
    )
    return training_args




In [None]:
def data_exclude_func(dataset, setting, rate):
    sentence_ids = []
    selected_needs=[]
    forbidden_need_ids = []
    if setting == 'LF_testing':
       for k, v in frequency.items():
           if (v[0] <= rate):
              sentence_ids.append(v[1])
              selected_needs.append(k)
           else:
              forbidden_need_ids.append(k)
    elif setting == 'HF_testing':
       for k, v in frequency.items():
           if (v[0] >= rate):
              sentence_ids.append(v[1])
              selected_needs.append(k)
           else:
              forbidden_need_ids.append(k)
    exclude_ids = list(set([id for id_list in sentence_ids for id in id_list]))
    df_excluded_primary = dataset[dataset['Sentence_ID'].isin(exclude_ids)]
    # Rows with sentence_id not in exclude_ids will go to df
    df_main = dataset[~dataset['Sentence_ID'].isin(exclude_ids)]
    # Check if any forbidden need ID is present in the specified columns
    condition = df_excluded_primary[['Need_ID_1', 'Need_ID_2', 'Need_ID_3', 'Need_ID_4', 'Need_ID_5',
                                 'Need_ID_6', 'Need_ID_7', 'Need_ID_8', 'Need_ID_9', 'Need_ID_10']].isin(forbidden_need_ids).any(axis=1)
    # Exclude rows that meet the condition
    df_garbage = df_excluded_primary[condition]
    df_excluded = df_excluded_primary[~condition]

    return df_main, df_excluded, df_garbage



##################### testing data_exclude_func  ###################

data_set_1 = pd.read_csv('/content/drive/MyDrive/Fairness paper datasets/8000_sentences_with_need_IDs.csv')

# Create a dictionary from `data_set_1` that includes the population count for each customer need.
frequency = pd.read_pickle('/content/drive/MyDrive/paper1/datasets/dictionary frequency of occurence of needs')

ratios_maxstep = {
    'LF_testing': [  # Setting A
        (5, 2400),(10, 2400),(20, 2400),  # first value is ratio, and the other is max_step
        (30, 2200),(40, 2200 ),
        (50, 2000),(75, 1600),
        (175, 1400),(190, 1200),
        (200, 1000),(210, 800),
        (300, 500),(562, 500)
          ],  # LF_testing: Frequent Training

    'HF_testing': [ # Setting B
        (350, 2200),(320, 2000),
         (290, 1800),(210, 1600),
          (207, 1600),(205, 1500),
           (195, 1500),(175, 1200),
            (110, 1000),(75, 800),
             (58, 600),(43, 500)
             ]  # HF_testing: Infrequent Training
}


# The following provides the sample population for each experimental setting.
# This offers an initial estimate of the training time required.

for setting, rate_list in ratios_maxstep.items():
    print(f'################################### {setting} ###############################')
    for ratio in rate_list:
        x, y, z= data_exclude_func(data_set_1, setting, rate = ratio[0])
        if x[x.label==1].shape[0] < x[x.label==0].shape[0]:
           training_size = 0.8 * (2 * x[x.label==1].shape[0] )
           test_size_exc = y.shape[0]
           test_size_gb = z.shape[0]
        else:
           training_size = 0.8 * (x[x.label==1].shape[0] + x[x.label==0].shape[0] )
           test_size_exc = y.shape[0]
           test_size_gb = z.shape[0]
          #  print(f'{setting} and {ratio[0]}')
        print(f'{ratio[0]}: {training_size}, {test_size_exc}, {test_size_gb}')


################################### LF_testing ###############################
5: 6360.0, 36, 14
10: 6287.200000000001, 100, 41
20: 6238.400000000001, 141, 61
30: 5952.0, 329, 132
40: 5739.200000000001, 406, 188
50: 5278.400000000001, 604, 278
75: 4372.8, 1027, 421
175: 3384.0, 1628, 438
190: 2652.8, 2098, 425
200: 2209.6, 2380, 420
210: 1756.8000000000002, 2740, 343
300: 1038.4, 3282, 250
562: 702.4000000000001, 3618, 124
################################### HF_testing ###############################
350: 5788.8, 439, 124
320: 5251.200000000001, 649, 250
290: 4795.2, 898, 286
210: 4384.0, 1098, 343
207: 4096.0, 1207, 414
205: 3808.0, 1381, 420
195: 3606.4, 1545, 382
175: 2604.8, 2115, 438
110: 2222.4, 2378, 414
75: 1643.2, 2733, 421
58: 1027.2, 3195, 344
43: 702.4000000000001, 3548, 194


In [None]:
frequency = pd.read_pickle('/content/drive/MyDrive/paper1/datasets/dictionary frequency of occurence of needs')

# This experiment is specific to the Oral-Care dataset.
# If you do not need to evaluate the cross-domain performance of models on other domains,
# please comment out datasets 2-5.
data_set_1 = pd.read_csv('/content/drive/MyDrive/Fairness paper datasets/8000_sentences_with_need_IDs.csv')
data_set_2 = pd.read_csv('/content/drive/MyDrive/Fairness paper datasets/Electronics.csv')
data_set_3 = pd.read_csv('/content/drive/MyDrive/Fairness paper datasets/baby.csv')
data_set_4 = pd.read_csv('/content/drive/MyDrive/Fairness paper datasets/Pet_supplies.csv')
data_set_5 = pd.read_csv('/content/drive/MyDrive/Fairness paper datasets/Sport_outdoors.csv')

# Ensure the 'sentence' column is of type 'str'
data_set_1['sentence'] = data_set_1['sentence'].astype(str)
data_set_2['sentence'] = data_set_2['sentence'].astype(str)
data_set_3['sentence'] = data_set_3['sentence'].astype(str)
data_set_4['sentence'] = data_set_4['sentence'].astype(str)
data_set_5['sentence'] = data_set_5['sentence'].astype(str)

In [None]:
models = {
              'albert_base' :"albert-base-v2",
              'distilbert'  :"distilbert-base-uncased",
              'bertB'       :"bert-base-uncased",
              'bertL'       :"bert-large-uncased",
              'robertB'     :"roberta-base",
              'robertL'     :"roberta-large",
              'allenaiB'    :"allenai/reviews_roberta_base",

              'ALBERT-L'      :"albert/albert-large-v2",
              'DISRoBERTa-B'  :"distilbert/distilroberta-base",
              'XLNet-B'       :"xlnet/xlnet-base-cased",
              'XLNet-L'       :"xlnet/xlnet-large-cased",
              'DEBERT-B'      :"microsoft/deberta-base",
              'DEBERT-L'      :"microsoft/deberta-large",
              'XLM-B'         :"FacebookAI/xlm-roberta-base",
              'XLM-L'         :"FacebookAI/xlm-roberta-large"
          }

# Full training with the following settings requires over 100 hours on an A100 GPU.
ratios_maxstep = {
    'LF_testing': [
        (5, 2400), (10, 2400), (20, 2400),   # first value is ratio, and the other is max_step
        (30, 2200), (40, 2200 ),(50, 1900), (300, 600), (562, 500),
        (75, 1600), (175, 1400), (190, 1200),(200, 1000), (210, 800)

          ]  # LF_testing: Frequent Training

    'HF_testing': [
        (350, 2200),(320, 2000),
        (290, 1800),(210, 1600), (58, 500),

          (207, 1600),(205, 1500),
          (195, 1500),(175, 1200), (110, 1000)

        (75, 800), (43, 500)
             ]  # HF_testing: Infrequent Training
}


seeds = [
    9741, 1694, 6932, 94, 791, 5, 1759, 323, 200, 999
    ]  # randomly selected
all_datasets_ratio_models_seed_results = {}
results = []
results_exclude = []
results_garbage = []
results_uninf_exc = []


setting_ratio = 'HF_testing'
for ratio_max_step in ratios_maxstep[setting_ratio]:
    training_args = return_training_args()  # must be removed when adding in the loop
    ratio = ratio_max_step[0]
    max_stepp =  ratio_max_step[1]
    training_args.max_steps = max_stepp
    if max_stepp < 700 :
       training_args.logging_steps = 50
    else:
       training_args.logging_steps = 100

    all_datasets_ratio_models_seed_results.update({ratio:{}})
    for model_name, model_address in models.items():
        tokenizer = AutoTokenizer.from_pretrained(model_address)
        all_datasets_ratio_models_seed_results[ratio].update({model_name:{}})
        if model_name == 'bertL':
           training_args.learning_rate = 2e-5
        if model_name == 'robertL':
           training_args.learning_rate = 1.25e-5
        if model_name == 'ALBERT-L' :
           training_args.learning_rate = 8e-6
        if model_name == 'DISRoBERTa-B':
           training_args.learning_rate = 2e-5
        if model_name == 'XLNet-B':
           training_args.learning_rate = 2e-5

        if model_name == 'XLNet-L' :
           training_args.learning_rate = 1e-5
           training_args.per_device_train_batch_size = 32
           max_stepp = int(max_stepp * 3 / 4)
           training_args.max_steps = max_stepp

        if model_name == 'DEBERT-B':
           training_args.learning_rate = 3e-5
           training_args.per_device_train_batch_size = 16
           max_stepp =  ratio_max_step[1]
           training_args.max_steps = max_stepp

        if model_name == 'DEBERT-L':
           training_args.learning_rate = 1e-5
        if model_name ==  'XLM-B':
           training_args.learning_rate = 2e-5
        if model_name ==  'XLM-L':
           training_args.learning_rate = 8e-6

        for seed in seeds:
            set_seed(seed)
            training_args.seed = seed
            print(ratio, model_name, training_args.seed , training_args.learning_rate, training_args.max_steps)


            df_main, df_exclude, df_garbage = data_exclude_func(data_set_1,
                                                                setting= setting_ratio,
                                                                rate= ratio
                                                                )

            # for ratio 5, 10, and 20 for LF_testing setting, this balance startegy does not work as
            # the number of samples in the uninf. part is already less than inf. part
            # thus we add the follwoing if statement to preserve the consitency in the code
            # and solve the issue.
            if df_main[df_main.label==1].shape[0] < df_main[df_main.label==0].shape[0]:
               balance = True
            else:
               balance = False

            if balance == True:
                drop_uninf = abs(df_main[df_main.label == 1].shape[0] - df_main[df_main.label == 0].shape[0] )
                df_uninf_exc = df_main[df_main.label == 0].sample(n= int(drop_uninf), random_state = seed)
                df_main = df_main.drop(df_uninf_exc.index)
                test_uninf_exc_encodings = tokenizer(df_uninf_exc.sentence.tolist(), truncation=True, padding=True, max_length=40)
                test_uninf_dataset = AmazonDataset(test_uninf_exc_encodings)

            df_train = df_main.sample(frac = 0.8, random_state = seed)
            # Find the rows not selected
            df_test = df_main.drop(df_train.index)

            # Encoding
            train_main_encodings = tokenizer(df_train.sentence.tolist(), truncation=True, padding=True, max_length=40)
            test_main_encodings = tokenizer(df_test.sentence.tolist(), truncation=True, padding=True, max_length=40)
            test_exclude_encodings = tokenizer(df_exclude.sentence.tolist(), truncation=True, padding=True, max_length=40)
            test_garbage_encodings = tokenizer(df_garbage.sentence.tolist(), truncation=True, padding=True, max_length=40)


            train_dataset = AmazonDataset(train_main_encodings, df_train.label.tolist())
            test_main_dataset = AmazonDataset(test_main_encodings, df_test.label.tolist())
            test_exclude_dataset = AmazonDataset(test_exclude_encodings)
            test_garbage_dataset = AmazonDataset(test_garbage_encodings)


            print(f'Model seed is: {training_args.seed}, total, training informative and uninf samples: {df_main.shape[0]} ,\
             {df_train[df_train.label == 1].shape[0]}, {df_train[df_train.label == 0].shape[0]}')
            model = AutoModelForSequenceClassification.from_pretrained(model_address) # option 1: allenai/reviews_roberta_base  # option2: roberta-base
            trainer = Trainer(
                model=model,
                args=training_args,
                train_dataset=train_dataset,
                #eval_dataset=val_dataset,             # evaluation dataset must be off when doing simple K-fold cross validation
                # callbacks= [EarlyStoppingCallback(early_stopping_patience= 3,early_stopping_threshold=0.1)], # does not properly work
                compute_metrics=compute_metrics
            )

            trainoutput = trainer.train()
            tr_loss = trainoutput.training_loss
            pred = trainer.predict(test_main_dataset)
            results.append(["skin-care", ratio, model_name, seed, pred.metrics['test_accuracy'], pred.metrics['test_f1'], pred.metrics['test_precision'], pred.metrics['test_recall'], pred.metrics['test_AUC']])

            # raw predictions for unseen data
            raw_pred, _, _ = trainer.predict(test_exclude_dataset)
            # Preprocess raw predictions
            y_prediction = np.argmax(raw_pred, axis=1)
            recall_exc = recall_score(y_true= df_exclude.label, y_pred= y_prediction)
            results_exclude.append(["skin-care-exc", ratio, model_name, seed, y_prediction.shape[0], recall_exc])

            # raw predictions for garbage data
            raw_pred_garbage, _, _ = trainer.predict( test_garbage_dataset)
            # Preprocess raw predictions
            y_prediction_garbage = np.argmax(raw_pred_garbage, axis=1)
            recall_garbage = recall_score(y_true= df_garbage.label, y_pred= y_prediction_garbage)
            results_garbage.append(["skin-care-garbage", ratio, model_name, seed, y_prediction_garbage.shape[0], recall_garbage])

            if balance == True:
               # raw predictions for uninf excluded data
               raw_pred_uninf_exc, _, _ = trainer.predict( test_uninf_dataset)
               # Preprocess raw predictions
               y_prediction_uninf_exc = np.argmax(raw_pred_uninf_exc, axis=1)
               recall_uninf_exc = recall_score(y_true= df_uninf_exc.label, y_pred= y_prediction_uninf_exc, pos_label= 0)
               results_uninf_exc.append(["skin-care-uninf_exc", ratio, model_name, seed, y_prediction_uninf_exc.shape[0], recall_uninf_exc])

            # If you comment out datasets 2-5, make sure to also remove their corresponding keys
            # from the `all_datasets_ratio_models_seed_results` dictionary.
            all_datasets_ratio_models_seed_results[ratio][model_name].update(
                                                                           { seed : {
                                                                                            "skin-care":            [df_train.Sentence_ID.values, df_test.Sentence_ID.values, pred, tr_loss, 0],
                                                                                            "electronics":          trainer.predict(prepare_auxilary_datasets(data_set_2)),
                                                                                            "baby":                 trainer.predict(prepare_auxilary_datasets(data_set_3)),
                                                                                            "pet-supplies":         trainer.predict(prepare_auxilary_datasets(data_set_4)),
                                                                                            "Sport-outdoors":       trainer.predict(prepare_auxilary_datasets(data_set_5)),
                                                                                            "skin-care-EXC":        [df_exclude.Sentence_ID.values, raw_pred],
                                                                                            "skin-care-GB":         [df_garbage.Sentence_ID.values, raw_pred_garbage],
                                                                                            "skin-care-uninf_exc":  [df_uninf_exc.Sentence_ID.values, raw_pred_uninf_exc] if balance  else [],
                                                                                          }
                                                                           })

            raw_pred, _, _ = trainer.predict(test_exclude_dataset)

            print(f'for ratio of {ratio}, model of {model_name}, seed of {seed} performance is:\n', recall_exc," and ",pred.metrics)

    print('number of training samples: ', df_train.shape)

    # Checkpointing
    # Specify the path in Google Drive
    file_path = f'/content/drive/MyDrive/paper1/all_results_ratio_{ratio}'
    # Serialize and save the dictionary
    with open(file_path, 'wb') as handle:
         pickle.dump(all_datasets_ratio_models_seed_results, handle, protocol=pickle.HIGHEST_PROTOCOL)

df_result = pd.DataFrame(results, columns = ['category', 'ratio' ,'model', 'seed', 'accuracy','f1-score', 'precision', 'recall', 'AUC'])
display(df_result)
display(df_result.groupby(df_result.category).mean(numeric_only =True))


df_result_exc = pd.DataFrame(results_exclude, columns = ['category', 'ratio' ,'model', 'seed', 'NOS', 'recall'])
display(df_result_exc)
display(df_result_exc.groupby(df_result_exc.category).mean(numeric_only =True))


df_result_gb = pd.DataFrame(results_garbage, columns = ['category', 'ratio' ,'model', 'seed', 'NOS', 'recall'])
display(df_result_gb)
display(df_result_gb.groupby(df_result_gb.category).mean(numeric_only =True))

# If you comment out datasets 2-5, make sure to also remove their corresponding keys
# from the 'tables` dictionary.
tables = {"skin-care": results,"electronics": [],"baby": [],
          "pet-supplies": [],"Sport-outdoors": []}

for rto, values in all_datasets_ratio_models_seed_results.items():
    for model_name, value in values.items():
        for seed, val in value.items():
            for dataset, v in val.items():
                if dataset == "skin-care":
                  tables["skin-care"] =  df_result
                elif (dataset != "skin-care-EXC") and (dataset != "skin-care-GB") and (dataset != "skin-care-uninf_exc"):
                   tables[dataset].append([dataset, rto, model_name, seed, v.metrics['test_accuracy'],
                                           v.metrics['test_f1'], v.metrics['test_precision'],
                                           v.metrics['test_recall'], v.metrics['test_AUC']])

df_total_list = [pd.DataFrame(t, columns = ['category','ratio' ,'model', 'seed', 'accuracy','f1-score', 'precision', 'recall', 'AUC']) for k, t in tables.items()]
df = pd.concat(df_total_list).drop_duplicates().reset_index(drop=True)
display(df.groupby(df.category).mean(numeric_only =True))




The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/685 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/760k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.31M [00:00<?, ?B/s]

207 ALBERT-L 9741 8e-06 1600
Model seed is: 9741, total, training informative and uninf samples: 5120 ,             2078, 2018


model.safetensors:   0%|          | 0.00/71.5M [00:00<?, ?B/s]

Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6515
200,0.5741
300,0.4954
400,0.4785
500,0.4292
600,0.3586
700,0.316
800,0.3025
900,0.2216
1000,0.1895


for ratio of 207, model of ALBERT-L, seed of 9741 performance is:
 0.5343827671913836  and  {'test_loss': 1.0510976314544678, 'test_accuracy': 0.76953125, 'test_f1': 0.759674134419552, 'test_precision': 0.746, 'test_recall': 0.7738589211618258, 'test_AUC': 0.8471658679242394, 'test_runtime': 1.8831, 'test_samples_per_second': 543.777, 'test_steps_per_second': 8.497}
207 ALBERT-L 1694 8e-06 1600
Model seed is: 1694, total, training informative and uninf samples: 5120 ,             2031, 2065


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6654
200,0.5904
300,0.5302
400,0.4891
500,0.4748
600,0.3655
700,0.3882
800,0.3491
900,0.2507
1000,0.274


for ratio of 207, model of ALBERT-L, seed of 1694 performance is:
 0.5020712510356256  and  {'test_loss': 0.9064054489135742, 'test_accuracy': 0.7900390625, 'test_f1': 0.7906523855890945, 'test_precision': 0.8152610441767069, 'test_recall': 0.7674858223062382, 'test_AUC': 0.8581848733077467, 'test_runtime': 1.8847, 'test_samples_per_second': 543.317, 'test_steps_per_second': 8.489}
207 ALBERT-L 6932 8e-06 1600
Model seed is: 6932, total, training informative and uninf samples: 5120 ,             2048, 2048


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6196
200,0.5682
300,0.5013
400,0.4398
500,0.394
600,0.3072
700,0.2936
800,0.2518
900,0.1753
1000,0.1474


for ratio of 207, model of ALBERT-L, seed of 6932 performance is:
 0.5658657829328915  and  {'test_loss': 0.9417530298233032, 'test_accuracy': 0.8017578125, 'test_f1': 0.8093896713615024, 'test_precision': 0.779385171790235, 'test_recall': 0.841796875, 'test_AUC': 0.8553123474121094, 'test_runtime': 1.8829, 'test_samples_per_second': 543.835, 'test_steps_per_second': 8.497}
207 ALBERT-L 94 8e-06 1600
Model seed is: 94, total, training informative and uninf samples: 5120 ,             2059, 2037


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6607
200,0.5746
300,0.5005
400,0.4489
500,0.4358
600,0.3509
700,0.3492
800,0.312
900,0.2387
1000,0.2286


for ratio of 207, model of ALBERT-L, seed of 94 performance is:
 0.5492957746478874  and  {'test_loss': 0.9626073837280273, 'test_accuracy': 0.7919921875, 'test_f1': 0.7893175074183977, 'test_precision': 0.7823529411764706, 'test_recall': 0.7964071856287425, 'test_AUC': 0.8576575338806135, 'test_runtime': 1.8768, 'test_samples_per_second': 545.617, 'test_steps_per_second': 8.525}
207 ALBERT-L 791 8e-06 1600
Model seed is: 791, total, training informative and uninf samples: 5120 ,             2024, 2072


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6354
200,0.5732
300,0.5061
400,0.4563
500,0.4417
600,0.3539
700,0.3223
800,0.2742
900,0.2252
1000,0.2194


for ratio of 207, model of ALBERT-L, seed of 791 performance is:
 0.5385252692626347  and  {'test_loss': 0.8540312647819519, 'test_accuracy': 0.802734375, 'test_f1': 0.8183453237410071, 'test_precision': 0.7899305555555556, 'test_recall': 0.8488805970149254, 'test_AUC': 0.8530324810374357, 'test_runtime': 1.8754, 'test_samples_per_second': 546.026, 'test_steps_per_second': 8.532}
207 ALBERT-L 5 8e-06 1600
Model seed is: 5, total, training informative and uninf samples: 5120 ,             2073, 2023


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.623
200,0.5628
300,0.5216
400,0.4554
500,0.4378
600,0.3641
700,0.3367
800,0.3151
900,0.2574
1000,0.2222


for ratio of 207, model of ALBERT-L, seed of 5 performance is:
 0.5418392709196355  and  {'test_loss': 0.8524534702301025, 'test_accuracy': 0.80859375, 'test_f1': 0.8032128514056224, 'test_precision': 0.7858546168958742, 'test_recall': 0.8213552361396304, 'test_AUC': 0.8542476837247008, 'test_runtime': 1.8779, 'test_samples_per_second': 545.29, 'test_steps_per_second': 8.52}
207 ALBERT-L 1759 8e-06 1600
Model seed is: 1759, total, training informative and uninf samples: 5120 ,             2061, 2035


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6308
200,0.5669
300,0.5141
400,0.4461
500,0.4417
600,0.3509
700,0.3555
800,0.3023
900,0.2342
1000,0.2463


for ratio of 207, model of ALBERT-L, seed of 1759 performance is:
 0.5012427506213754  and  {'test_loss': 0.9717439413070679, 'test_accuracy': 0.7802734375, 'test_f1': 0.7756729810568296, 'test_precision': 0.7718253968253969, 'test_recall': 0.779559118236473, 'test_AUC': 0.8401755892737857, 'test_runtime': 1.8773, 'test_samples_per_second': 545.457, 'test_steps_per_second': 8.523}
207 ALBERT-L 323 8e-06 1600
Model seed is: 323, total, training informative and uninf samples: 5120 ,             2056, 2040


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6502
200,0.5535
300,0.5107
400,0.4463
500,0.4437
600,0.3455
700,0.335
800,0.2631
900,0.2312
1000,0.1953


for ratio of 207, model of ALBERT-L, seed of 323 performance is:
 0.49212924606462305  and  {'test_loss': 1.100644826889038, 'test_accuracy': 0.771484375, 'test_f1': 0.7736943907156673, 'test_precision': 0.7547169811320755, 'test_recall': 0.7936507936507936, 'test_AUC': 0.8431394993894994, 'test_runtime': 1.8694, 'test_samples_per_second': 547.764, 'test_steps_per_second': 8.559}
207 ALBERT-L 200 8e-06 1600
Model seed is: 200, total, training informative and uninf samples: 5120 ,             2038, 2058


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.673
200,0.5634
300,0.5365
400,0.4734
500,0.4329
600,0.363
700,0.3514
800,0.2987
900,0.233
1000,0.2534


for ratio of 207, model of ALBERT-L, seed of 200 performance is:
 0.5302402651201326  and  {'test_loss': 0.9892237782478333, 'test_accuracy': 0.7724609375, 'test_f1': 0.7828518173345759, 'test_precision': 0.7622504537205081, 'test_recall': 0.8045977011494253, 'test_AUC': 0.840530597914854, 'test_runtime': 1.8889, 'test_samples_per_second': 542.115, 'test_steps_per_second': 8.471}
207 ALBERT-L 999 8e-06 1600
Model seed is: 999, total, training informative and uninf samples: 5120 ,             2034, 2062


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6324
200,0.5603
300,0.4903
400,0.4115
500,0.4252
600,0.2875
700,0.2904
800,0.2625
900,0.1641
1000,0.1716


for ratio of 207, model of ALBERT-L, seed of 999 performance is:
 0.4863297431648716  and  {'test_loss': 1.064729928970337, 'test_accuracy': 0.796875, 'test_f1': 0.8026565464895635, 'test_precision': 0.8011363636363636, 'test_recall': 0.8041825095057035, 'test_AUC': 0.858903293783499, 'test_runtime': 1.8716, 'test_samples_per_second': 547.116, 'test_steps_per_second': 8.549}


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/480 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

207 DISRoBERTa-B 9741 2e-05 1600
Model seed is: 9741, total, training informative and uninf samples: 5120 ,             2078, 2018


model.safetensors:   0%|          | 0.00/331M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.649
200,0.5494
300,0.4836
400,0.4269
500,0.3927
600,0.3082
700,0.2891
800,0.2518
900,0.2039
1000,0.1445


for ratio of 207, model of DISRoBERTa-B, seed of 9741 performance is:
 0.4780447390223695  and  {'test_loss': 1.0284149646759033, 'test_accuracy': 0.8037109375, 'test_f1': 0.7942681678607986, 'test_precision': 0.7838383838383839, 'test_recall': 0.8049792531120332, 'test_AUC': 0.8753081410482155, 'test_runtime': 0.3245, 'test_samples_per_second': 3155.605, 'test_steps_per_second': 49.306}
207 DISRoBERTa-B 1694 2e-05 1600
Model seed is: 1694, total, training informative and uninf samples: 5120 ,             2031, 2065


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6367
200,0.5776
300,0.4951
400,0.4301
500,0.4131
600,0.3334
700,0.3155
800,0.2541
900,0.2329
1000,0.2273


for ratio of 207, model of DISRoBERTa-B, seed of 1694 performance is:
 0.5136702568351285  and  {'test_loss': 0.9318922758102417, 'test_accuracy': 0.8056640625, 'test_f1': 0.8131455399061033, 'test_precision': 0.8078358208955224, 'test_recall': 0.8185255198487713, 'test_AUC': 0.8834660403658513, 'test_runtime': 0.3259, 'test_samples_per_second': 3142.074, 'test_steps_per_second': 49.095}
207 DISRoBERTa-B 6932 2e-05 1600
Model seed is: 6932, total, training informative and uninf samples: 5120 ,             2048, 2048


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6438
200,0.5596
300,0.4887
400,0.4266
500,0.3797
600,0.2847
700,0.3183
800,0.2451
900,0.1987
1000,0.1896


for ratio of 207, model of DISRoBERTa-B, seed of 6932 performance is:
 0.5219552609776305  and  {'test_loss': 0.9534397125244141, 'test_accuracy': 0.806640625, 'test_f1': 0.8125, 'test_precision': 0.7886029411764706, 'test_recall': 0.837890625, 'test_AUC': 0.8797950744628906, 'test_runtime': 0.32, 'test_samples_per_second': 3199.824, 'test_steps_per_second': 49.997}
207 DISRoBERTa-B 94 2e-05 1600
Model seed is: 94, total, training informative and uninf samples: 5120 ,             2059, 2037


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6356
200,0.5511
300,0.4665
400,0.4261
500,0.3917
600,0.2956
700,0.2985
800,0.246
900,0.1833
1000,0.1874


for ratio of 207, model of DISRoBERTa-B, seed of 94 performance is:
 0.4896437448218724  and  {'test_loss': 1.0448064804077148, 'test_accuracy': 0.796875, 'test_f1': 0.7976653696498054, 'test_precision': 0.777988614800759, 'test_recall': 0.8183632734530938, 'test_AUC': 0.8691374421329425, 'test_runtime': 0.3298, 'test_samples_per_second': 3104.841, 'test_steps_per_second': 48.513}
207 DISRoBERTa-B 791 2e-05 1600
Model seed is: 791, total, training informative and uninf samples: 5120 ,             2024, 2072


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6513
200,0.5634
300,0.4843
400,0.4373
500,0.4204
600,0.3128
700,0.2834
800,0.2664
900,0.1954
1000,0.2308


for ratio of 207, model of DISRoBERTa-B, seed of 791 performance is:
 0.5434962717481359  and  {'test_loss': 0.9395159482955933, 'test_accuracy': 0.8017578125, 'test_f1': 0.8176100628930818, 'test_precision': 0.7885615251299827, 'test_recall': 0.8488805970149254, 'test_AUC': 0.8797444641546367, 'test_runtime': 0.3288, 'test_samples_per_second': 3114.012, 'test_steps_per_second': 48.656}
207 DISRoBERTa-B 5 2e-05 1600
Model seed is: 5, total, training informative and uninf samples: 5120 ,             2073, 2023


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.633
200,0.5716
300,0.4872
400,0.4413
500,0.402
600,0.3488
700,0.3129
800,0.2812
900,0.2088
1000,0.2137


for ratio of 207, model of DISRoBERTa-B, seed of 5 performance is:
 0.531897265948633  and  {'test_loss': 0.8124768733978271, 'test_accuracy': 0.8232421875, 'test_f1': 0.8213228035538006, 'test_precision': 0.7908745247148289, 'test_recall': 0.8542094455852156, 'test_AUC': 0.8973305954825461, 'test_runtime': 0.3253, 'test_samples_per_second': 3148.129, 'test_steps_per_second': 49.19}
207 DISRoBERTa-B 1759 2e-05 1600
Model seed is: 1759, total, training informative and uninf samples: 5120 ,             2061, 2035


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6268
200,0.5566
300,0.5041
400,0.3971
500,0.4042
600,0.3054
700,0.3054
800,0.2587
900,0.1902
1000,0.1873


for ratio of 207, model of DISRoBERTa-B, seed of 1759 performance is:
 0.4772162386081193  and  {'test_loss': 1.0567948818206787, 'test_accuracy': 0.7861328125, 'test_f1': 0.7842364532019704, 'test_precision': 0.7713178294573644, 'test_recall': 0.7975951903807615, 'test_AUC': 0.860986735375513, 'test_runtime': 0.3204, 'test_samples_per_second': 3195.974, 'test_steps_per_second': 49.937}
207 DISRoBERTa-B 323 2e-05 1600
Model seed is: 323, total, training informative and uninf samples: 5120 ,             2056, 2040


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.65
200,0.5554
300,0.4821
400,0.4317
500,0.4326
600,0.3314
700,0.2872
800,0.2378
900,0.2178
1000,0.2096


for ratio of 207, model of DISRoBERTa-B, seed of 323 performance is:
 0.5294117647058824  and  {'test_loss': 1.0114357471466064, 'test_accuracy': 0.78515625, 'test_f1': 0.7908745247148289, 'test_precision': 0.7591240875912408, 'test_recall': 0.8253968253968254, 'test_AUC': 0.8720428876678878, 'test_runtime': 0.3263, 'test_samples_per_second': 3138.015, 'test_steps_per_second': 49.031}
207 DISRoBERTa-B 200 2e-05 1600
Model seed is: 200, total, training informative and uninf samples: 5120 ,             2038, 2058


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.636
200,0.5434
300,0.4787
400,0.4223
500,0.4101
600,0.2988
700,0.3025
800,0.2467
900,0.2153
1000,0.176


for ratio of 207, model of DISRoBERTa-B, seed of 200 performance is:
 0.5086992543496272  and  {'test_loss': 0.8518528938293457, 'test_accuracy': 0.8310546875, 'test_f1': 0.8387698042870456, 'test_precision': 0.8166969147005445, 'test_recall': 0.8620689655172413, 'test_AUC': 0.8863091694524584, 'test_runtime': 0.3225, 'test_samples_per_second': 3175.222, 'test_steps_per_second': 49.613}
207 DISRoBERTa-B 999 2e-05 1600
Model seed is: 999, total, training informative and uninf samples: 5120 ,             2034, 2062


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6204
200,0.5651
300,0.4813
400,0.4113
500,0.4035
600,0.289
700,0.2741
800,0.2577
900,0.1582
1000,0.189


for ratio of 207, model of DISRoBERTa-B, seed of 999 performance is:
 0.52029826014913  and  {'test_loss': 1.0639636516571045, 'test_accuracy': 0.79296875, 'test_f1': 0.8051470588235294, 'test_precision': 0.7793594306049823, 'test_recall': 0.8326996197718631, 'test_AUC': 0.8694740940950113, 'test_runtime': 0.3213, 'test_samples_per_second': 3186.769, 'test_steps_per_second': 49.793}


config.json:   0%|          | 0.00/760 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/798k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.38M [00:00<?, ?B/s]

207 XLNet-B 9741 2e-05 1600
Model seed is: 9741, total, training informative and uninf samples: 5120 ,             2078, 2018


pytorch_model.bin:   0%|          | 0.00/467M [00:00<?, ?B/s]

Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6986
200,0.6182
300,0.5352
400,0.5048
500,0.464
600,0.3722
700,0.3566
800,0.3015
900,0.2508
1000,0.2168


for ratio of 207, model of XLNet-B, seed of 9741 performance is:
 0.5227837613918807  and  {'test_loss': 1.1681146621704102, 'test_accuracy': 0.775390625, 'test_f1': 0.7758284600389863, 'test_precision': 0.7316176470588235, 'test_recall': 0.8257261410788381, 'test_AUC': 0.8580675537045829, 'test_runtime': 0.7759, 'test_samples_per_second': 1319.819, 'test_steps_per_second': 20.622}
207 XLNet-B 1694 2e-05 1600
Model seed is: 1694, total, training informative and uninf samples: 5120 ,             2031, 2065


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6574
200,0.5921
300,0.5205
400,0.4764
500,0.4847
600,0.3645
700,0.3415
800,0.2627
900,0.2342
1000,0.2199


for ratio of 207, model of XLNet-B, seed of 1694 performance is:
 0.5443247721623861  and  {'test_loss': 1.0950231552124023, 'test_accuracy': 0.80078125, 'test_f1': 0.8145454545454545, 'test_precision': 0.7845884413309983, 'test_recall': 0.8468809073724007, 'test_AUC': 0.8815756811976095, 'test_runtime': 0.7778, 'test_samples_per_second': 1316.601, 'test_steps_per_second': 20.572}
207 XLNet-B 6932 2e-05 1600
Model seed is: 6932, total, training informative and uninf samples: 5120 ,             2048, 2048


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6432
200,0.5926
300,0.527
400,0.4667
500,0.4165
600,0.3299
700,0.3241
800,0.2841
900,0.1908
1000,0.1894


for ratio of 207, model of XLNet-B, seed of 6932 performance is:
 0.584092792046396  and  {'test_loss': 1.2388333082199097, 'test_accuracy': 0.791015625, 'test_f1': 0.8, 'test_precision': 0.7670250896057348, 'test_recall': 0.8359375, 'test_AUC': 0.8537750244140625, 'test_runtime': 0.7689, 'test_samples_per_second': 1331.736, 'test_steps_per_second': 20.808}
207 XLNet-B 94 2e-05 1600
Model seed is: 94, total, training informative and uninf samples: 5120 ,             2059, 2037


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6525
200,0.5715
300,0.4773
400,0.4609
500,0.4153
600,0.3251
700,0.3119
800,0.2692
900,0.173
1000,0.1771


for ratio of 207, model of XLNet-B, seed of 94 performance is:
 0.6056338028169014  and  {'test_loss': 1.2810126543045044, 'test_accuracy': 0.771484375, 'test_f1': 0.7891891891891892, 'test_precision': 0.7192118226600985, 'test_recall': 0.874251497005988, 'test_AUC': 0.8576804326337765, 'test_runtime': 0.7721, 'test_samples_per_second': 1326.328, 'test_steps_per_second': 20.724}
207 XLNet-B 791 2e-05 1600
Model seed is: 791, total, training informative and uninf samples: 5120 ,             2024, 2072


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6675
200,0.5945
300,0.4939
400,0.4691
500,0.4717
600,0.3366
700,0.3156
800,0.2808
900,0.2141
1000,0.2108


for ratio of 207, model of XLNet-B, seed of 791 performance is:
 0.5352112676056338  and  {'test_loss': 1.1375391483306885, 'test_accuracy': 0.7841796875, 'test_f1': 0.799637352674524, 'test_precision': 0.7777777777777778, 'test_recall': 0.8227611940298507, 'test_AUC': 0.864176810619036, 'test_runtime': 0.7753, 'test_samples_per_second': 1320.705, 'test_steps_per_second': 20.636}
207 XLNet-B 5 2e-05 1600
Model seed is: 5, total, training informative and uninf samples: 5120 ,             2073, 2023


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6367
200,0.5708
300,0.5287
400,0.4735
500,0.4454
600,0.3522
700,0.3148
800,0.291
900,0.1959
1000,0.1813


for ratio of 207, model of XLNet-B, seed of 5 performance is:
 0.5816072908036454  and  {'test_loss': 1.0971007347106934, 'test_accuracy': 0.8017578125, 'test_f1': 0.8023369036027264, 'test_precision': 0.762962962962963, 'test_recall': 0.8459958932238193, 'test_AUC': 0.8924093469308158, 'test_runtime': 0.7719, 'test_samples_per_second': 1326.635, 'test_steps_per_second': 20.729}
207 XLNet-B 1759 2e-05 1600
Model seed is: 1759, total, training informative and uninf samples: 5120 ,             2061, 2035


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.648
200,0.596
300,0.5535
400,0.4553
500,0.4828
600,0.3503
700,0.3475
800,0.3008
900,0.2053
1000,0.21


for ratio of 207, model of XLNet-B, seed of 1759 performance is:
 0.5542667771333886  and  {'test_loss': 1.1393604278564453, 'test_accuracy': 0.779296875, 'test_f1': 0.7855787476280834, 'test_precision': 0.745945945945946, 'test_recall': 0.8296593186372746, 'test_AUC': 0.8515354518560931, 'test_runtime': 0.7677, 'test_samples_per_second': 1333.83, 'test_steps_per_second': 20.841}
207 XLNet-B 323 2e-05 1600
Model seed is: 323, total, training informative and uninf samples: 5120 ,             2056, 2040


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6647
200,0.5877
300,0.5339
400,0.4682
500,0.4784
600,0.3728
700,0.3345
800,0.2861
900,0.2421
1000,0.2183


for ratio of 207, model of XLNet-B, seed of 323 performance is:
 0.5169842584921293  and  {'test_loss': 1.1050910949707031, 'test_accuracy': 0.7978515625, 'test_f1': 0.8019138755980861, 'test_precision': 0.7744916820702403, 'test_recall': 0.8313492063492064, 'test_AUC': 0.873469932844933, 'test_runtime': 0.7807, 'test_samples_per_second': 1311.716, 'test_steps_per_second': 20.496}
207 XLNet-B 200 2e-05 1600
Model seed is: 200, total, training informative and uninf samples: 5120 ,             2038, 2058


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6614
200,0.5655
300,0.5028
400,0.4483
500,0.4497
600,0.3205
700,0.3013
800,0.2705
900,0.1997
1000,0.1695


for ratio of 207, model of XLNet-B, seed of 200 performance is:
 0.551781275890638  and  {'test_loss': 1.3150649070739746, 'test_accuracy': 0.7705078125, 'test_f1': 0.7842056932966024, 'test_precision': 0.7530864197530864, 'test_recall': 0.8180076628352491, 'test_AUC': 0.8495176382592236, 'test_runtime': 0.7726, 'test_samples_per_second': 1325.348, 'test_steps_per_second': 20.709}
207 XLNet-B 999 2e-05 1600
Model seed is: 999, total, training informative and uninf samples: 5120 ,             2034, 2062


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6426
200,0.58
300,0.499
400,0.4464
500,0.459
600,0.3248
700,0.3037
800,0.2813
900,0.1878
1000,0.2051


for ratio of 207, model of XLNet-B, seed of 999 performance is:
 0.52029826014913  and  {'test_loss': 1.2050237655639648, 'test_accuracy': 0.7783203125, 'test_f1': 0.7896200185356812, 'test_precision': 0.7703435804701627, 'test_recall': 0.8098859315589354, 'test_AUC': 0.8498327912410096, 'test_runtime': 0.7821, 'test_samples_per_second': 1309.345, 'test_steps_per_second': 20.459}


config.json:   0%|          | 0.00/761 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/798k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.38M [00:00<?, ?B/s]

207 XLNet-L 9741 1e-05 1200
Model seed is: 9741, total, training informative and uninf samples: 5120 ,             2078, 2018


pytorch_model.bin:   0%|          | 0.00/1.44G [00:00<?, ?B/s]

Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.7124
200,0.7077
300,0.7046
400,0.7054
500,0.7092
600,0.7083
700,0.704
800,0.6954
900,0.6617
1000,0.6186


for ratio of 207, model of XLNet-L, seed of 9741 performance is:
 0.5617232808616405  and  {'test_loss': 0.5572043061256409, 'test_accuracy': 0.7265625, 'test_f1': 0.717741935483871, 'test_precision': 0.6980392156862745, 'test_recall': 0.7385892116182573, 'test_AUC': 0.7894918160799865, 'test_runtime': 2.1403, 'test_samples_per_second': 478.442, 'test_steps_per_second': 7.476}
207 XLNet-L 1694 1e-05 1200
Model seed is: 1694, total, training informative and uninf samples: 5120 ,             2031, 2065


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6954
200,0.6168
300,0.5514
400,0.4844
500,0.4231
600,0.3564
700,0.2858
800,0.2324
900,0.2013
1000,0.1619


for ratio of 207, model of XLNet-L, seed of 1694 performance is:
 0.5443247721623861  and  {'test_loss': 0.695160984992981, 'test_accuracy': 0.791015625, 'test_f1': 0.7973484848484849, 'test_precision': 0.7988614800759013, 'test_recall': 0.7958412098298677, 'test_AUC': 0.8744610566916806, 'test_runtime': 2.1459, 'test_samples_per_second': 477.192, 'test_steps_per_second': 7.456}
207 XLNet-L 6932 1e-05 1200
Model seed is: 6932, total, training informative and uninf samples: 5120 ,             2048, 2048


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6926
200,0.6289
300,0.5485
400,0.509
500,0.427
600,0.3778
700,0.3097
800,0.261
900,0.2191
1000,0.1706


for ratio of 207, model of XLNet-L, seed of 6932 performance is:
 0.5393537696768849  and  {'test_loss': 0.8134058713912964, 'test_accuracy': 0.767578125, 'test_f1': 0.7652859960552267, 'test_precision': 0.7729083665338645, 'test_recall': 0.7578125, 'test_AUC': 0.8393783569335938, 'test_runtime': 2.1483, 'test_samples_per_second': 476.66, 'test_steps_per_second': 7.448}
207 XLNet-L 94 1e-05 1200
Model seed is: 94, total, training informative and uninf samples: 5120 ,             2059, 2037


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6944
200,0.6388
300,0.5783
400,0.5379
500,0.4615
600,0.4178
700,0.3583
800,0.3212
900,0.2769
1000,0.2195


for ratio of 207, model of XLNet-L, seed of 94 performance is:
 0.5526097763048882  and  {'test_loss': 0.6132825016975403, 'test_accuracy': 0.779296875, 'test_f1': 0.7801556420233464, 'test_precision': 0.7609108159392789, 'test_recall': 0.8003992015968064, 'test_AUC': 0.8652255718009488, 'test_runtime': 2.1434, 'test_samples_per_second': 477.754, 'test_steps_per_second': 7.465}
207 XLNet-L 791 1e-05 1200
Model seed is: 791, total, training informative and uninf samples: 5120 ,             2024, 2072


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6956
200,0.6357
300,0.5228
400,0.4834
500,0.4144
600,0.3505
700,0.2998
800,0.2351
900,0.1959
1000,0.1621


for ratio of 207, model of XLNet-L, seed of 791 performance is:
 0.5791217895608948  and  {'test_loss': 0.8719863891601562, 'test_accuracy': 0.7763671875, 'test_f1': 0.7889400921658987, 'test_precision': 0.7795992714025501, 'test_recall': 0.7985074626865671, 'test_AUC': 0.8421404758991927, 'test_runtime': 2.1419, 'test_samples_per_second': 478.088, 'test_steps_per_second': 7.47}
207 XLNet-L 5 1e-05 1200
Model seed is: 5, total, training informative and uninf samples: 5120 ,             2073, 2023


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6789
200,0.632
300,0.581
400,0.5272
500,0.4735
600,0.4085
700,0.3658
800,0.32
900,0.2866
1000,0.2308


for ratio of 207, model of XLNet-L, seed of 5 performance is:
 0.5592377796188898  and  {'test_loss': 0.5538701415061951, 'test_accuracy': 0.796875, 'test_f1': 0.7915831663326653, 'test_precision': 0.7729941291585127, 'test_recall': 0.811088295687885, 'test_AUC': 0.8793816128082472, 'test_runtime': 2.1524, 'test_samples_per_second': 475.741, 'test_steps_per_second': 7.433}
207 XLNet-L 1759 1e-05 1200
Model seed is: 1759, total, training informative and uninf samples: 5120 ,             2061, 2035


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.7095
200,0.6362
300,0.5719
400,0.5145
500,0.464
600,0.4049
700,0.334
800,0.2821
900,0.2426
1000,0.1838


for ratio of 207, model of XLNet-L, seed of 1759 performance is:
 0.5865782932891467  and  {'test_loss': 0.7058906555175781, 'test_accuracy': 0.77734375, 'test_f1': 0.7769080234833661, 'test_precision': 0.7590822179732314, 'test_recall': 0.7955911823647295, 'test_AUC': 0.8562801794064319, 'test_runtime': 2.1319, 'test_samples_per_second': 480.321, 'test_steps_per_second': 7.505}
207 XLNet-L 323 1e-05 1200
Model seed is: 323, total, training informative and uninf samples: 5120 ,             2056, 2040


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6847
200,0.595
300,0.5196
400,0.4416
500,0.3965
600,0.3304
700,0.2773
800,0.2264
900,0.1744
1000,0.1373


for ratio of 207, model of XLNet-L, seed of 323 performance is:
 0.5534382767191384  and  {'test_loss': 0.7674712538719177, 'test_accuracy': 0.7880859375, 'test_f1': 0.7919463087248323, 'test_precision': 0.7662337662337663, 'test_recall': 0.8194444444444444, 'test_AUC': 0.8742483211233212, 'test_runtime': 2.1366, 'test_samples_per_second': 479.267, 'test_steps_per_second': 7.489}
207 XLNet-L 200 1e-05 1200
Model seed is: 200, total, training informative and uninf samples: 5120 ,             2038, 2058


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6804
200,0.578
300,0.5183
400,0.453
500,0.3938
600,0.3393
700,0.2755
800,0.2137
900,0.1801
1000,0.1358


for ratio of 207, model of XLNet-L, seed of 200 performance is:
 0.5824357912178956  and  {'test_loss': 0.8403900861740112, 'test_accuracy': 0.7861328125, 'test_f1': 0.8007279344858963, 'test_precision': 0.7625649913344887, 'test_recall': 0.842911877394636, 'test_AUC': 0.8534406435560441, 'test_runtime': 2.1368, 'test_samples_per_second': 479.214, 'test_steps_per_second': 7.488}
207 XLNet-L 999 1e-05 1200
Model seed is: 999, total, training informative and uninf samples: 5120 ,             2034, 2062


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6951
200,0.5976
300,0.5347
400,0.4982
500,0.4019
600,0.3469
700,0.2939
800,0.2523
900,0.2041
1000,0.1553


for ratio of 207, model of XLNet-L, seed of 999 performance is:
 0.5335542667771334  and  {'test_loss': 0.6747133731842041, 'test_accuracy': 0.787109375, 'test_f1': 0.7981481481481479, 'test_precision': 0.7779783393501805, 'test_recall': 0.8193916349809885, 'test_AUC': 0.8691076091437995, 'test_runtime': 2.1408, 'test_samples_per_second': 478.33, 'test_steps_per_second': 7.474}


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/474 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

207 DEBERT-B 9741 3e-05 1600
Model seed is: 9741, total, training informative and uninf samples: 5120 ,             2078, 2018


pytorch_model.bin:   0%|          | 0.00/559M [00:00<?, ?B/s]

Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6991
200,0.6714
300,0.5811
400,0.5527
500,0.496
600,0.454
700,0.4023
800,0.3359
900,0.2961
1000,0.2474


for ratio of 207, model of DEBERT-B, seed of 9741 performance is:
 0.5186412593206297  and  {'test_loss': 0.9303174018859863, 'test_accuracy': 0.78515625, 'test_f1': 0.7855750487329435, 'test_precision': 0.7408088235294118, 'test_recall': 0.8360995850622407, 'test_AUC': 0.8632925540873666, 'test_runtime': 0.7007, 'test_samples_per_second': 1461.355, 'test_steps_per_second': 22.834}
207 DEBERT-B 1694 3e-05 1600
Model seed is: 1694, total, training informative and uninf samples: 5120 ,             2031, 2065


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6658
200,0.5618
300,0.4441
400,0.3778
500,0.3703
600,0.2117
700,0.1964
800,0.153
900,0.0909
1000,0.1018


for ratio of 207, model of DEBERT-B, seed of 1694 performance is:
 0.4979287489643745  and  {'test_loss': 1.2073957920074463, 'test_accuracy': 0.8125, 'test_f1': 0.8188679245283019, 'test_precision': 0.8173258003766478, 'test_recall': 0.8204158790170132, 'test_AUC': 0.8841076168108305, 'test_runtime': 0.691, 'test_samples_per_second': 1481.943, 'test_steps_per_second': 23.155}
207 DEBERT-B 6932 3e-05 1600
Model seed is: 6932, total, training informative and uninf samples: 5120 ,             2048, 2048


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6768
200,0.623
300,0.5228
400,0.4381
500,0.3835
600,0.2427
700,0.2489
800,0.2031
900,0.1429
1000,0.1167


for ratio of 207, model of DEBERT-B, seed of 6932 performance is:
 0.551781275890638  and  {'test_loss': 1.1942880153656006, 'test_accuracy': 0.8037109375, 'test_f1': 0.8119738072965389, 'test_precision': 0.7791741472172352, 'test_recall': 0.84765625, 'test_AUC': 0.8708610534667969, 'test_runtime': 0.6907, 'test_samples_per_second': 1482.57, 'test_steps_per_second': 23.165}
207 DEBERT-B 94 3e-05 1600
Model seed is: 94, total, training informative and uninf samples: 5120 ,             2059, 2037


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6288
200,0.5254
300,0.4506
400,0.3476
500,0.3626
600,0.2408
700,0.2045
800,0.1557
900,0.0952
1000,0.1224


for ratio of 207, model of DEBERT-B, seed of 94 performance is:
 0.56006628003314  and  {'test_loss': 1.2132288217544556, 'test_accuracy': 0.802734375, 'test_f1': 0.8119180633147113, 'test_precision': 0.7609075043630017, 'test_recall': 0.8702594810379242, 'test_AUC': 0.8750224216958054, 'test_runtime': 0.6906, 'test_samples_per_second': 1482.751, 'test_steps_per_second': 23.168}
207 DEBERT-B 791 3e-05 1600
Model seed is: 791, total, training informative and uninf samples: 5120 ,             2024, 2072


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6456
200,0.5629
300,0.4606
400,0.3917
500,0.3806
600,0.2351
700,0.2131
800,0.1678
900,0.1206
1000,0.1412


for ratio of 207, model of DEBERT-B, seed of 791 performance is:
 0.5592377796188898  and  {'test_loss': 1.2362534999847412, 'test_accuracy': 0.794921875, 'test_f1': 0.8114901256732496, 'test_precision': 0.7820069204152249, 'test_recall': 0.8432835820895522, 'test_AUC': 0.8716242047956937, 'test_runtime': 0.689, 'test_samples_per_second': 1486.18, 'test_steps_per_second': 23.222}
207 DEBERT-B 5 3e-05 1600
Model seed is: 5, total, training informative and uninf samples: 5120 ,             2073, 2023


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6356
200,0.5472
300,0.4544
400,0.3592
500,0.3502
600,0.24
700,0.2046
800,0.1601
900,0.0873
1000,0.1158


for ratio of 207, model of DEBERT-B, seed of 5 performance is:
 0.5352112676056338  and  {'test_loss': 1.1044714450836182, 'test_accuracy': 0.8330078125, 'test_f1': 0.8311944718657452, 'test_precision': 0.8003802281368821, 'test_recall': 0.864476386036961, 'test_AUC': 0.9036704790091734, 'test_runtime': 0.6853, 'test_samples_per_second': 1494.294, 'test_steps_per_second': 23.348}
207 DEBERT-B 1759 3e-05 1600
Model seed is: 1759, total, training informative and uninf samples: 5120 ,             2061, 2035


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6846
200,0.5992
300,0.514
400,0.3941
500,0.4019
600,0.2276
700,0.2421
800,0.1922
900,0.0972
1000,0.1202


for ratio of 207, model of DEBERT-B, seed of 1759 performance is:
 0.5791217895608948  and  {'test_loss': 1.4429396390914917, 'test_accuracy': 0.7880859375, 'test_f1': 0.7962441314553991, 'test_precision': 0.7491166077738516, 'test_recall': 0.8496993987975952, 'test_AUC': 0.8535585456627541, 'test_runtime': 0.6945, 'test_samples_per_second': 1474.421, 'test_steps_per_second': 23.038}
207 DEBERT-B 323 3e-05 1600
Model seed is: 323, total, training informative and uninf samples: 5120 ,             2056, 2040


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6565
200,0.5659
300,0.478
400,0.4189
500,0.4094
600,0.2712
700,0.2297
800,0.1886
900,0.1459
1000,0.1224


for ratio of 207, model of DEBERT-B, seed of 323 performance is:
 0.5070422535211268  and  {'test_loss': 1.067767858505249, 'test_accuracy': 0.8095703125, 'test_f1': 0.8123195380173243, 'test_precision': 0.788785046728972, 'test_recall': 0.8373015873015873, 'test_AUC': 0.8825091575091575, 'test_runtime': 0.6821, 'test_samples_per_second': 1501.352, 'test_steps_per_second': 23.459}
207 DEBERT-B 200 3e-05 1600
Model seed is: 200, total, training informative and uninf samples: 5120 ,             2038, 2058


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6877
200,0.5955
300,0.5021
400,0.4321
500,0.4116
600,0.2595
700,0.2759
800,0.193
900,0.1582
1000,0.1335


for ratio of 207, model of DEBERT-B, seed of 200 performance is:
 0.47970173985086995  and  {'test_loss': 1.1028001308441162, 'test_accuracy': 0.8203125, 'test_f1': 0.8260869565217391, 'test_precision': 0.8152985074626866, 'test_recall': 0.8371647509578544, 'test_AUC': 0.8840805360931752, 'test_runtime': 0.6926, 'test_samples_per_second': 1478.573, 'test_steps_per_second': 23.103}
207 DEBERT-B 999 3e-05 1600
Model seed is: 999, total, training informative and uninf samples: 5120 ,             2034, 2062


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.618
200,0.5431
300,0.4663
400,0.3456
500,0.353
600,0.1971
700,0.1535
800,0.1381
900,0.0824
1000,0.0863


for ratio of 207, model of DEBERT-B, seed of 999 performance is:
 0.4995857497928749  and  {'test_loss': 1.2467365264892578, 'test_accuracy': 0.80859375, 'test_f1': 0.8191881918819187, 'test_precision': 0.7956989247311828, 'test_recall': 0.844106463878327, 'test_AUC': 0.8799532731687205, 'test_runtime': 0.6869, 'test_samples_per_second': 1490.748, 'test_steps_per_second': 23.293}


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/475 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

207 DEBERT-L 9741 1e-05 1600
Model seed is: 9741, total, training informative and uninf samples: 5120 ,             2078, 2018


pytorch_model.bin:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.692
200,0.5993
300,0.49
400,0.423
500,0.3661
600,0.2568
700,0.2115
800,0.1837
900,0.1261
1000,0.1298


for ratio of 207, model of DEBERT-L, seed of 9741 performance is:
 0.5376967688483845  and  {'test_loss': 1.1115179061889648, 'test_accuracy': 0.8232421875, 'test_f1': 0.8223748773307163, 'test_precision': 0.7802607076350093, 'test_recall': 0.8692946058091287, 'test_AUC': 0.8798173355177534, 'test_runtime': 1.9213, 'test_samples_per_second': 532.969, 'test_steps_per_second': 8.328}
207 DEBERT-L 1694 1e-05 1600
Model seed is: 1694, total, training informative and uninf samples: 5120 ,             2031, 2065


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6162
200,0.547
300,0.426
400,0.3696
500,0.3277
600,0.2049
700,0.1885
800,0.1606
900,0.1042
1000,0.131


for ratio of 207, model of DEBERT-L, seed of 1694 performance is:
 0.5700082850041425  and  {'test_loss': 1.2514581680297852, 'test_accuracy': 0.8134765625, 'test_f1': 0.8239631336405531, 'test_precision': 0.8039568345323741, 'test_recall': 0.8449905482041588, 'test_AUC': 0.8784900040098527, 'test_runtime': 1.9137, 'test_samples_per_second': 535.1, 'test_steps_per_second': 8.361}
207 DEBERT-L 6932 1e-05 1600
Model seed is: 6932, total, training informative and uninf samples: 5120 ,             2048, 2048


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6505
200,0.5442
300,0.4346
400,0.3718
500,0.3353
600,0.2461
700,0.2055
800,0.1758
900,0.1253
1000,0.1036


for ratio of 207, model of DEBERT-L, seed of 6932 performance is:
 0.5534382767191384  and  {'test_loss': 1.2448549270629883, 'test_accuracy': 0.8125, 'test_f1': 0.8188679245283018, 'test_precision': 0.791970802919708, 'test_recall': 0.84765625, 'test_AUC': 0.868499755859375, 'test_runtime': 1.918, 'test_samples_per_second': 533.894, 'test_steps_per_second': 8.342}
207 DEBERT-L 94 1e-05 1600
Model seed is: 94, total, training informative and uninf samples: 5120 ,             2059, 2037


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6845
200,0.6193
300,0.5014
400,0.473
500,0.4662
600,0.3468
700,0.3132
800,0.3009
900,0.2
1000,0.228


for ratio of 207, model of DEBERT-L, seed of 94 performance is:
 0.5211267605633803  and  {'test_loss': 1.1733696460723877, 'test_accuracy': 0.8125, 'test_f1': 0.817490494296578, 'test_precision': 0.7803992740471869, 'test_recall': 0.8582834331337326, 'test_AUC': 0.8772893982589315, 'test_runtime': 1.9188, 'test_samples_per_second': 533.66, 'test_steps_per_second': 8.338}
207 DEBERT-L 791 1e-05 1600
Model seed is: 791, total, training informative and uninf samples: 5120 ,             2024, 2072


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6748
200,0.5808
300,0.5034
400,0.4483
500,0.396
600,0.2671
700,0.2466
800,0.2117
900,0.1424
1000,0.1617


for ratio of 207, model of DEBERT-L, seed of 791 performance is:
 0.5509527754763878  and  {'test_loss': 1.1217517852783203, 'test_accuracy': 0.81640625, 'test_f1': 0.8306306306306306, 'test_precision': 0.8031358885017421, 'test_recall': 0.8600746268656716, 'test_AUC': 0.8890957609493517, 'test_runtime': 1.912, 'test_samples_per_second': 535.566, 'test_steps_per_second': 8.368}
207 DEBERT-L 5 1e-05 1600
Model seed is: 5, total, training informative and uninf samples: 5120 ,             2073, 2023


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6239
200,0.5392
300,0.4606
400,0.3538
500,0.3292
600,0.2224
700,0.1822
800,0.1644
900,0.0864
1000,0.0803


for ratio of 207, model of DEBERT-L, seed of 5 performance is:
 0.579950289975145  and  {'test_loss': 1.1966619491577148, 'test_accuracy': 0.826171875, 'test_f1': 0.8261718749999999, 'test_precision': 0.7877094972067039, 'test_recall': 0.8685831622176592, 'test_AUC': 0.8956098792057174, 'test_runtime': 1.9295, 'test_samples_per_second': 530.717, 'test_steps_per_second': 8.292}
207 DEBERT-L 1759 1e-05 1600
Model seed is: 1759, total, training informative and uninf samples: 5120 ,             2061, 2035


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6627
200,0.6059
300,0.5552
400,0.4516
500,0.4462
600,0.35
700,0.3622
800,0.2902
900,0.2026
1000,0.2128


for ratio of 207, model of DEBERT-L, seed of 1759 performance is:
 0.5567522783761392  and  {'test_loss': 1.1152708530426025, 'test_accuracy': 0.818359375, 'test_f1': 0.8187134502923976, 'test_precision': 0.7969639468690702, 'test_recall': 0.8416833667334669, 'test_AUC': 0.887867162897223, 'test_runtime': 1.9351, 'test_samples_per_second': 529.182, 'test_steps_per_second': 8.268}
207 DEBERT-L 323 1e-05 1600
Model seed is: 323, total, training informative and uninf samples: 5120 ,             2056, 2040


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6603
200,0.5665
300,0.4987
400,0.4247
500,0.4098
600,0.2973
700,0.2897
800,0.1968
900,0.181
1000,0.1549


for ratio of 207, model of DEBERT-L, seed of 323 performance is:
 0.5509527754763878  and  {'test_loss': 1.2038954496383667, 'test_accuracy': 0.7998046875, 'test_f1': 0.8096564531104922, 'test_precision': 0.7609075043630017, 'test_recall': 0.8650793650793651, 'test_AUC': 0.8505837912087912, 'test_runtime': 1.9251, 'test_samples_per_second': 531.92, 'test_steps_per_second': 8.311}
207 DEBERT-L 200 1e-05 1600
Model seed is: 200, total, training informative and uninf samples: 5120 ,             2038, 2058


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6516
200,0.5346
300,0.4639
400,0.3733
500,0.3755
600,0.2538
700,0.2191
800,0.1774
900,0.1213
1000,0.1352


for ratio of 207, model of DEBERT-L, seed of 200 performance is:
 0.56006628003314  and  {'test_loss': 1.199034333229065, 'test_accuracy': 0.8125, 'test_f1': 0.8248175182481751, 'test_precision': 0.7874564459930313, 'test_recall': 0.8659003831417624, 'test_AUC': 0.8850956327944927, 'test_runtime': 1.928, 'test_samples_per_second': 531.114, 'test_steps_per_second': 8.299}
207 DEBERT-L 999 1e-05 1600
Model seed is: 999, total, training informative and uninf samples: 5120 ,             2034, 2062


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.5964
200,0.5324
300,0.4413
400,0.3384
500,0.3359
600,0.1881
700,0.1658
800,0.1571
900,0.091
1000,0.0888


for ratio of 207, model of DEBERT-L, seed of 999 performance is:
 0.5410107705053853  and  {'test_loss': 1.2461509704589844, 'test_accuracy': 0.826171875, 'test_f1': 0.8360957642725599, 'test_precision': 0.8107142857142857, 'test_recall': 0.8631178707224335, 'test_AUC': 0.8935704796371798, 'test_runtime': 1.9229, 'test_samples_per_second': 532.522, 'test_steps_per_second': 8.321}


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

207 XLM-B 9741 2e-05 1600
Model seed is: 9741, total, training informative and uninf samples: 5120 ,             2078, 2018


model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6658
200,0.607
300,0.5287
400,0.4854
500,0.4439
600,0.3679
700,0.3254
800,0.3121
900,0.2422
1000,0.2213


for ratio of 207, model of XLM-B, seed of 9741 performance is:
 0.46727423363711684  and  {'test_loss': 0.9919942021369934, 'test_accuracy': 0.80078125, 'test_f1': 0.7939393939393938, 'test_precision': 0.7736220472440944, 'test_recall': 0.8153526970954357, 'test_AUC': 0.8712008696850455, 'test_runtime': 0.577, 'test_samples_per_second': 1774.589, 'test_steps_per_second': 27.728}
207 XLM-B 1694 2e-05 1600
Model seed is: 1694, total, training informative and uninf samples: 5120 ,             2031, 2065


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6904
200,0.6681
300,0.6553
400,0.5904
500,0.5665
600,0.4834
700,0.5263
800,0.4249
900,0.3923
1000,0.3715


for ratio of 207, model of XLM-B, seed of 1694 performance is:
 0.47141673570836784  and  {'test_loss': 0.6056860685348511, 'test_accuracy': 0.8115234375, 'test_f1': 0.8194574368568756, 'test_precision': 0.8111111111111111, 'test_recall': 0.8279773156899811, 'test_AUC': 0.8911821427889481, 'test_runtime': 0.5778, 'test_samples_per_second': 1772.265, 'test_steps_per_second': 27.692}
207 XLM-B 6932 2e-05 1600
Model seed is: 6932, total, training informative and uninf samples: 5120 ,             2048, 2048


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6782
200,0.6199
300,0.5517
400,0.5101
500,0.4656
600,0.3831
700,0.3767
800,0.3346
900,0.2708
1000,0.2517


for ratio of 207, model of XLM-B, seed of 6932 performance is:
 0.5443247721623861  and  {'test_loss': 0.8268373608589172, 'test_accuracy': 0.8056640625, 'test_f1': 0.8159111933395005, 'test_precision': 0.7750439367311072, 'test_recall': 0.861328125, 'test_AUC': 0.8683242797851562, 'test_runtime': 0.5688, 'test_samples_per_second': 1800.403, 'test_steps_per_second': 28.131}
207 XLM-B 94 2e-05 1600
Model seed is: 94, total, training informative and uninf samples: 5120 ,             2059, 2037


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.677
200,0.6034
300,0.5328
400,0.5031
500,0.4884
600,0.3902
700,0.3795
800,0.3271
900,0.252
1000,0.2515


for ratio of 207, model of XLM-B, seed of 94 performance is:
 0.4623032311516156  and  {'test_loss': 0.9548035860061646, 'test_accuracy': 0.7880859375, 'test_f1': 0.7907425265188042, 'test_precision': 0.7649253731343284, 'test_recall': 0.8183632734530938, 'test_AUC': 0.8736141483762876, 'test_runtime': 0.5713, 'test_samples_per_second': 1792.421, 'test_steps_per_second': 28.007}
207 XLM-B 791 2e-05 1600
Model seed is: 791, total, training informative and uninf samples: 5120 ,             2024, 2072


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6669
200,0.603
300,0.5573
400,0.5078
500,0.4841
600,0.3934
700,0.3616
800,0.3268
900,0.282
1000,0.2764


for ratio of 207, model of XLM-B, seed of 791 performance is:
 0.5434962717481359  and  {'test_loss': 0.829099714756012, 'test_accuracy': 0.80078125, 'test_f1': 0.8222996515679443, 'test_precision': 0.7712418300653595, 'test_recall': 0.8805970149253731, 'test_AUC': 0.8715248042574015, 'test_runtime': 0.574, 'test_samples_per_second': 1783.995, 'test_steps_per_second': 27.875}
207 XLM-B 5 2e-05 1600
Model seed is: 5, total, training informative and uninf samples: 5120 ,             2073, 2023


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6889
200,0.6203
300,0.5484
400,0.4844
500,0.4788
600,0.3995
700,0.352
800,0.3286
900,0.2386
1000,0.2479


for ratio of 207, model of XLM-B, seed of 5 performance is:
 0.5236122618061309  and  {'test_loss': 0.9557459950447083, 'test_accuracy': 0.8046875, 'test_f1': 0.8019801980198018, 'test_precision': 0.7743785850860421, 'test_recall': 0.8316221765913757, 'test_AUC': 0.8867998118683538, 'test_runtime': 0.5689, 'test_samples_per_second': 1799.86, 'test_steps_per_second': 28.123}
207 XLM-B 1759 2e-05 1600
Model seed is: 1759, total, training informative and uninf samples: 5120 ,             2061, 2035


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6621
200,0.6119
300,0.5887
400,0.4766
500,0.5008
600,0.4202
700,0.4046
800,0.3454
900,0.271
1000,0.2735


for ratio of 207, model of XLM-B, seed of 1759 performance is:
 0.5128417564208783  and  {'test_loss': 0.8999230861663818, 'test_accuracy': 0.78515625, 'test_f1': 0.7888675623800384, 'test_precision': 0.7569060773480663, 'test_recall': 0.8236472945891784, 'test_AUC': 0.8575894646435729, 'test_runtime': 0.5761, 'test_samples_per_second': 1777.399, 'test_steps_per_second': 27.772}
207 XLM-B 323 2e-05 1600
Model seed is: 323, total, training informative and uninf samples: 5120 ,             2056, 2040


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.685
200,0.6208
300,0.592
400,0.5324
500,0.5007
600,0.4319
700,0.3993
800,0.3407
900,0.3282
1000,0.2809


for ratio of 207, model of XLM-B, seed of 323 performance is:
 0.5169842584921293  and  {'test_loss': 0.8202871680259705, 'test_accuracy': 0.7890625, 'test_f1': 0.793103448275862, 'test_precision': 0.7666666666666667, 'test_recall': 0.8214285714285714, 'test_AUC': 0.8670634920634921, 'test_runtime': 0.5694, 'test_samples_per_second': 1798.332, 'test_steps_per_second': 28.099}
207 XLM-B 200 2e-05 1600
Model seed is: 200, total, training informative and uninf samples: 5120 ,             2038, 2058


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6933
200,0.601
300,0.5287
400,0.4936
500,0.4576
600,0.3682
700,0.3572
800,0.2866
900,0.2509
1000,0.247


for ratio of 207, model of XLM-B, seed of 200 performance is:
 0.5161557580778791  and  {'test_loss': 0.9007734656333923, 'test_accuracy': 0.8056640625, 'test_f1': 0.8192552225249772, 'test_precision': 0.7789291882556131, 'test_recall': 0.8639846743295019, 'test_AUC': 0.877558730594862, 'test_runtime': 0.5711, 'test_samples_per_second': 1793.186, 'test_steps_per_second': 28.019}
207 XLM-B 999 2e-05 1600
Model seed is: 999, total, training informative and uninf samples: 5120 ,             2034, 2062


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.651
200,0.6059
300,0.5363
400,0.4931
500,0.4831
600,0.3659
700,0.3754
800,0.332
900,0.2682
1000,0.2386


for ratio of 207, model of XLM-B, seed of 999 performance is:
 0.5459817729908865  and  {'test_loss': 0.8829811811447144, 'test_accuracy': 0.8203125, 'test_f1': 0.8336347197106692, 'test_precision': 0.7948275862068965, 'test_recall': 0.876425855513308, 'test_AUC': 0.8780979431032114, 'test_runtime': 0.5752, 'test_samples_per_second': 1780.135, 'test_steps_per_second': 27.815}


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/616 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

207 XLM-L 9741 8e-06 1600
Model seed is: 9741, total, training informative and uninf samples: 5120 ,             2078, 2018


model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6886
200,0.608
300,0.5244
400,0.486
500,0.4396
600,0.391
700,0.3589
800,0.338
900,0.2938
1000,0.2609


for ratio of 207, model of XLM-L, seed of 9741 performance is:
 0.5484672742336372  and  {'test_loss': 0.7718395590782166, 'test_accuracy': 0.8046875, 'test_f1': 0.8073217726396916, 'test_precision': 0.7535971223021583, 'test_recall': 0.8692946058091287, 'test_AUC': 0.8838825006507327, 'test_runtime': 1.6769, 'test_samples_per_second': 610.646, 'test_steps_per_second': 9.541}
207 XLM-L 1694 8e-06 1600
Model seed is: 1694, total, training informative and uninf samples: 5120 ,             2031, 2065


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6984
200,0.6747
300,0.6653
400,0.5966
500,0.5969
600,0.5047
700,0.5171
800,0.4651
900,0.389
1000,0.4005


for ratio of 207, model of XLM-L, seed of 1694 performance is:
 0.4780447390223695  and  {'test_loss': 0.553219199180603, 'test_accuracy': 0.7998046875, 'test_f1': 0.8030739673390971, 'test_precision': 0.81640625, 'test_recall': 0.7901701323251418, 'test_AUC': 0.8812396173454776, 'test_runtime': 1.6817, 'test_samples_per_second': 608.906, 'test_steps_per_second': 9.514}
207 XLM-L 6932 8e-06 1600
Model seed is: 6932, total, training informative and uninf samples: 5120 ,             2048, 2048


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6927
200,0.6133
300,0.57
400,0.4961
500,0.4651
600,0.3787
700,0.3807
800,0.3549
900,0.2752
1000,0.2735


for ratio of 207, model of XLM-L, seed of 6932 performance is:
 0.5965202982601492  and  {'test_loss': 0.8510459065437317, 'test_accuracy': 0.796875, 'test_f1': 0.8070500927643784, 'test_precision': 0.7685512367491166, 'test_recall': 0.849609375, 'test_AUC': 0.8701171875, 'test_runtime': 1.6827, 'test_samples_per_second': 608.558, 'test_steps_per_second': 9.509}
207 XLM-L 94 8e-06 1600
Model seed is: 94, total, training informative and uninf samples: 5120 ,             2059, 2037


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.7007
200,0.6215
300,0.5571
400,0.4879
500,0.4802
600,0.3925
700,0.3851
800,0.3486
900,0.2772
1000,0.2693


for ratio of 207, model of XLM-L, seed of 94 performance is:
 0.5691797845898923  and  {'test_loss': 0.763963520526886, 'test_accuracy': 0.7880859375, 'test_f1': 0.7973856209150326, 'test_precision': 0.7491228070175439, 'test_recall': 0.8522954091816367, 'test_AUC': 0.8777817214519336, 'test_runtime': 1.682, 'test_samples_per_second': 608.804, 'test_steps_per_second': 9.513}
207 XLM-L 791 8e-06 1600
Model seed is: 791, total, training informative and uninf samples: 5120 ,             2024, 2072


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.7033
200,0.5937
300,0.5393
400,0.5085
500,0.4775
600,0.4045
700,0.3738
800,0.3666
900,0.3151
1000,0.3303


for ratio of 207, model of XLM-L, seed of 791 performance is:
 0.5675227837613919  and  {'test_loss': 0.7508405447006226, 'test_accuracy': 0.7890625, 'test_f1': 0.8081705150976909, 'test_precision': 0.7711864406779662, 'test_recall': 0.8488805970149254, 'test_AUC': 0.866577715928554, 'test_runtime': 1.6799, 'test_samples_per_second': 609.554, 'test_steps_per_second': 9.524}
207 XLM-L 5 8e-06 1600
Model seed is: 5, total, training informative and uninf samples: 5120 ,             2073, 2023


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6875
200,0.6086
300,0.5532
400,0.5055
500,0.4731
600,0.408
700,0.4127
800,0.3656
900,0.3033
1000,0.2897


for ratio of 207, model of XLM-L, seed of 5 performance is:
 0.6031483015741508  and  {'test_loss': 0.7853913307189941, 'test_accuracy': 0.8173828125, 'test_f1': 0.8161258603736479, 'test_precision': 0.7830188679245284, 'test_recall': 0.8521560574948666, 'test_AUC': 0.8901571205151442, 'test_runtime': 1.6775, 'test_samples_per_second': 610.439, 'test_steps_per_second': 9.538}
207 XLM-L 1759 8e-06 1600
Model seed is: 1759, total, training informative and uninf samples: 5120 ,             2061, 2035


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.7273
200,0.666
300,0.6166
400,0.5693
500,0.5609
600,0.5106
700,0.5115
800,0.4523
900,0.4306
1000,0.4466


for ratio of 207, model of XLM-L, seed of 1759 performance is:
 0.603976801988401  and  {'test_loss': 0.5305101871490479, 'test_accuracy': 0.78125, 'test_f1': 0.7850287907869482, 'test_precision': 0.7532228360957642, 'test_recall': 0.8196392785571143, 'test_AUC': 0.8607844259948468, 'test_runtime': 1.6805, 'test_samples_per_second': 609.325, 'test_steps_per_second': 9.521}
207 XLM-L 323 8e-06 1600
Model seed is: 323, total, training informative and uninf samples: 5120 ,             2056, 2040


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.684
200,0.5955
300,0.5424
400,0.4829
500,0.482
600,0.4045
700,0.362
800,0.3286
900,0.2813
1000,0.2646


for ratio of 207, model of XLM-L, seed of 323 performance is:
 0.5492957746478874  and  {'test_loss': 0.8165757656097412, 'test_accuracy': 0.79296875, 'test_f1': 0.7992424242424242, 'test_precision': 0.7644927536231884, 'test_recall': 0.8373015873015873, 'test_AUC': 0.8704326923076922, 'test_runtime': 1.6754, 'test_samples_per_second': 611.189, 'test_steps_per_second': 9.55}
207 XLM-L 200 8e-06 1600
Model seed is: 200, total, training informative and uninf samples: 5120 ,             2038, 2058


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.7053
200,0.6375
300,0.5736
400,0.5244
500,0.4737
600,0.4129
700,0.4118
800,0.3687
900,0.3199
1000,0.2837


for ratio of 207, model of XLM-L, seed of 200 performance is:
 0.5625517812758907  and  {'test_loss': 0.6584899425506592, 'test_accuracy': 0.802734375, 'test_f1': 0.8150183150183151, 'test_precision': 0.7807017543859649, 'test_recall': 0.8524904214559387, 'test_AUC': 0.8751660026560424, 'test_runtime': 1.6794, 'test_samples_per_second': 609.759, 'test_steps_per_second': 9.527}
207 XLM-L 999 8e-06 1600
Model seed is: 999, total, training informative and uninf samples: 5120 ,             2034, 2062


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6847
200,0.6331
300,0.5397
400,0.5062
500,0.4933
600,0.399
700,0.396
800,0.3671
900,0.3003
1000,0.2812


for ratio of 207, model of XLM-L, seed of 999 performance is:
 0.5774647887323944  and  {'test_loss': 0.6523309946060181, 'test_accuracy': 0.818359375, 'test_f1': 0.8315217391304348, 'test_precision': 0.7941176470588235, 'test_recall': 0.8726235741444867, 'test_AUC': 0.8897262052010322, 'test_runtime': 1.6827, 'test_samples_per_second': 608.531, 'test_steps_per_second': 9.508}
number of training samples:  (4096, 13)
205 ALBERT-L 9741 8e-06 1500
Model seed is: 9741, total, training informative and uninf samples: 4760 ,             1915, 1893


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6064
200,0.5538
300,0.4537
400,0.4188
500,0.3822
600,0.2754
700,0.3013
800,0.19
900,0.1832
1000,0.1531


for ratio of 205, model of ALBERT-L, seed of 9741 performance is:
 0.4822592324402607  and  {'test_loss': 0.9347986578941345, 'test_accuracy': 0.8046218487394958, 'test_f1': 0.8078512396694215, 'test_precision': 0.7773359840954275, 'test_recall': 0.8408602150537634, 'test_AUC': 0.8766333267095008, 'test_runtime': 1.7636, 'test_samples_per_second': 539.82, 'test_steps_per_second': 8.506}
205 ALBERT-L 1694 8e-06 1500
Model seed is: 1694, total, training informative and uninf samples: 4760 ,             1916, 1892


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6313
200,0.5719
300,0.4761
400,0.4251
500,0.3979
600,0.304
700,0.2865
800,0.211
900,0.1821
1000,0.1451


for ratio of 205, model of ALBERT-L, seed of 1694 performance is:
 0.4670528602461984  and  {'test_loss': 1.0879698991775513, 'test_accuracy': 0.7888655462184874, 'test_f1': 0.7873015873015874, 'test_precision': 0.7733887733887734, 'test_recall': 0.8017241379310345, 'test_AUC': 0.8558507631430187, 'test_runtime': 1.7603, 'test_samples_per_second': 540.828, 'test_steps_per_second': 8.521}
205 ALBERT-L 6932 8e-06 1500
Model seed is: 6932, total, training informative and uninf samples: 4760 ,             1906, 1902


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.622
200,0.5516
300,0.4773
400,0.4235
500,0.3781
600,0.3015
700,0.284
800,0.1726
900,0.2146
1000,0.1731


for ratio of 205, model of ALBERT-L, seed of 6932 performance is:
 0.49746560463432293  and  {'test_loss': 0.8631330728530884, 'test_accuracy': 0.8035714285714286, 'test_f1': 0.8078108941418294, 'test_precision': 0.7875751503006012, 'test_recall': 0.8291139240506329, 'test_AUC': 0.8709637554508058, 'test_runtime': 1.773, 'test_samples_per_second': 536.94, 'test_steps_per_second': 8.46}
205 ALBERT-L 94 8e-06 1500
Model seed is: 94, total, training informative and uninf samples: 4760 ,             1917, 1891


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6977
200,0.6692
300,0.5669
400,0.5329
500,0.4769
600,0.3734
700,0.4094
800,0.2936
900,0.2725
1000,0.2181


for ratio of 205, model of ALBERT-L, seed of 94 performance is:
 0.48153511947863864  and  {'test_loss': 0.9581461548805237, 'test_accuracy': 0.7846638655462185, 'test_f1': 0.7866805411030178, 'test_precision': 0.7590361445783133, 'test_recall': 0.816414686825054, 'test_AUC': 0.8448943716404528, 'test_runtime': 1.7642, 'test_samples_per_second': 539.628, 'test_steps_per_second': 8.503}
205 ALBERT-L 791 8e-06 1500
Model seed is: 791, total, training informative and uninf samples: 4760 ,             1880, 1928


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6352
200,0.5427
300,0.4771
400,0.4189
500,0.4062
600,0.3103
700,0.3145
800,0.2426
900,0.2136
1000,0.1717


for ratio of 205, model of ALBERT-L, seed of 791 performance is:
 0.5148443157132513  and  {'test_loss': 0.9302808046340942, 'test_accuracy': 0.7941176470588235, 'test_f1': 0.805940594059406, 'test_precision': 0.7980392156862746, 'test_recall': 0.814, 'test_AUC': 0.8601238938053097, 'test_runtime': 1.7615, 'test_samples_per_second': 540.458, 'test_steps_per_second': 8.516}
205 ALBERT-L 5 8e-06 1500
Model seed is: 5, total, training informative and uninf samples: 4760 ,             1923, 1885


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6309
200,0.5292
300,0.478
400,0.4057
500,0.3838
600,0.2711
700,0.2999
800,0.2137
900,0.2157
1000,0.1651


for ratio of 205, model of ALBERT-L, seed of 5 performance is:
 0.4945691527878349  and  {'test_loss': 1.1264034509658813, 'test_accuracy': 0.7605042016806722, 'test_f1': 0.7594936708860759, 'test_precision': 0.7331975560081466, 'test_recall': 0.787746170678337, 'test_AUC': 0.8284684923634595, 'test_runtime': 1.7642, 'test_samples_per_second': 539.636, 'test_steps_per_second': 8.503}
205 ALBERT-L 1759 8e-06 1500
Model seed is: 1759, total, training informative and uninf samples: 4760 ,             1905, 1903


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6819
200,0.5985
300,0.4795
400,0.4596
500,0.3894
600,0.342
700,0.2982
800,0.2406
900,0.2438
1000,0.1938


for ratio of 205, model of ALBERT-L, seed of 1759 performance is:
 0.4503982621288921  and  {'test_loss': 0.9722914695739746, 'test_accuracy': 0.7972689075630253, 'test_f1': 0.7987486965589156, 'test_precision': 0.7913223140495868, 'test_recall': 0.8063157894736842, 'test_AUC': 0.8635639412997904, 'test_runtime': 1.7671, 'test_samples_per_second': 538.722, 'test_steps_per_second': 8.488}
205 ALBERT-L 323 8e-06 1500
Model seed is: 323, total, training informative and uninf samples: 4760 ,             1921, 1887


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6351
200,0.5417
300,0.4699
400,0.4324
500,0.3902
600,0.2861
700,0.2851
800,0.2058
900,0.1885
1000,0.1144


for ratio of 205, model of ALBERT-L, seed of 323 performance is:
 0.4525706010137581  and  {'test_loss': 0.9106193780899048, 'test_accuracy': 0.8161764705882353, 'test_f1': 0.8112189859762675, 'test_precision': 0.8034188034188035, 'test_recall': 0.8191721132897604, 'test_AUC': 0.8777437501933385, 'test_runtime': 1.7751, 'test_samples_per_second': 536.297, 'test_steps_per_second': 8.45}
205 ALBERT-L 200 8e-06 1500
Model seed is: 200, total, training informative and uninf samples: 4760 ,             1895, 1913


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6528
200,0.5403
300,0.4898
400,0.408
500,0.3764
600,0.3
700,0.2849
800,0.2363
900,0.1793
1000,0.1497


for ratio of 205, model of ALBERT-L, seed of 200 performance is:
 0.4728457639391745  and  {'test_loss': 1.041413426399231, 'test_accuracy': 0.805672268907563, 'test_f1': 0.8066875653082549, 'test_precision': 0.8177966101694916, 'test_recall': 0.7958762886597938, 'test_AUC': 0.8655776065696814, 'test_runtime': 1.7631, 'test_samples_per_second': 539.949, 'test_steps_per_second': 8.508}
205 ALBERT-L 999 8e-06 1500
Model seed is: 999, total, training informative and uninf samples: 4760 ,             1915, 1893


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6317
200,0.5284
300,0.4811
400,0.4227
500,0.3758
600,0.2922
700,0.297
800,0.2025
900,0.2184
1000,0.1821


for ratio of 205, model of ALBERT-L, seed of 999 performance is:
 0.4728457639391745  and  {'test_loss': 1.0570058822631836, 'test_accuracy': 0.7993697478991597, 'test_f1': 0.7952840300107181, 'test_precision': 0.7927350427350427, 'test_recall': 0.7978494623655914, 'test_AUC': 0.8551544456956128, 'test_runtime': 1.7687, 'test_samples_per_second': 538.252, 'test_steps_per_second': 8.481}
205 DISRoBERTa-B 9741 2e-05 1500
Model seed is: 9741, total, training informative and uninf samples: 4760 ,             1915, 1893


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6236
200,0.5419
300,0.427
400,0.401
500,0.3532
600,0.2697
700,0.275
800,0.1814
900,0.1798
1000,0.1306


for ratio of 205, model of DISRoBERTa-B, seed of 9741 performance is:
 0.46415640839971034  and  {'test_loss': 0.9797323346138, 'test_accuracy': 0.8077731092436975, 'test_f1': 0.811534500514933, 'test_precision': 0.7786561264822134, 'test_recall': 0.8473118279569892, 'test_AUC': 0.8794727429290589, 'test_runtime': 0.3101, 'test_samples_per_second': 3069.538, 'test_steps_per_second': 48.365}
205 DISRoBERTa-B 1694 2e-05 1500
Model seed is: 1694, total, training informative and uninf samples: 4760 ,             1916, 1892


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6328
200,0.5404
300,0.441
400,0.3708
500,0.3592
600,0.2745
700,0.262
800,0.1976
900,0.1778
1000,0.1107


for ratio of 205, model of DISRoBERTa-B, seed of 1694 performance is:
 0.45836350470673426  and  {'test_loss': 1.0742998123168945, 'test_accuracy': 0.8035714285714286, 'test_f1': 0.8045977011494253, 'test_precision': 0.7809330628803245, 'test_recall': 0.8297413793103449, 'test_AUC': 0.8676688807235725, 'test_runtime': 0.2973, 'test_samples_per_second': 3201.864, 'test_steps_per_second': 50.45}
205 DISRoBERTa-B 6932 2e-05 1500
Model seed is: 6932, total, training informative and uninf samples: 4760 ,             1906, 1902


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6167
200,0.5455
300,0.4643
400,0.402
500,0.349
600,0.2778
700,0.2799
800,0.1863
900,0.1894
1000,0.1734


for ratio of 205, model of DISRoBERTa-B, seed of 6932 performance is:
 0.4779145546705286  and  {'test_loss': 0.8540316224098206, 'test_accuracy': 0.8214285714285714, 'test_f1': 0.8240165631469979, 'test_precision': 0.8089430894308943, 'test_recall': 0.8396624472573839, 'test_AUC': 0.8954195575799303, 'test_runtime': 0.3021, 'test_samples_per_second': 3150.964, 'test_steps_per_second': 49.648}
205 DISRoBERTa-B 94 2e-05 1500
Model seed is: 94, total, training informative and uninf samples: 4760 ,             1917, 1891


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6156
200,0.5259
300,0.437
400,0.3943
500,0.3756
600,0.2638
700,0.2761
800,0.2032
900,0.2008
1000,0.1561


for ratio of 205, model of DISRoBERTa-B, seed of 94 performance is:
 0.4532947139753802  and  {'test_loss': 0.982396125793457, 'test_accuracy': 0.8077731092436975, 'test_f1': 0.8123076923076923, 'test_precision': 0.7734375, 'test_recall': 0.8552915766738661, 'test_AUC': 0.87825906442822, 'test_runtime': 0.3002, 'test_samples_per_second': 3170.816, 'test_steps_per_second': 49.96}
205 DISRoBERTa-B 791 2e-05 1500
Model seed is: 791, total, training informative and uninf samples: 4760 ,             1880, 1928


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.652
200,0.5422
300,0.4415
400,0.3871
500,0.3614
600,0.2545
700,0.2718
800,0.2065
900,0.1817
1000,0.1474


for ratio of 205, model of DISRoBERTa-B, seed of 791 performance is:
 0.4728457639391745  and  {'test_loss': 1.1183961629867554, 'test_accuracy': 0.7930672268907563, 'test_f1': 0.8039800995024876, 'test_precision': 0.8, 'test_recall': 0.808, 'test_AUC': 0.8799911504424779, 'test_runtime': 0.3005, 'test_samples_per_second': 3167.996, 'test_steps_per_second': 49.916}
205 DISRoBERTa-B 5 2e-05 1500
Model seed is: 5, total, training informative and uninf samples: 4760 ,             1923, 1885


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6123
200,0.5264
300,0.4575
400,0.4207
500,0.3736
600,0.2833
700,0.312
800,0.2131
900,0.2424
1000,0.1768


for ratio of 205, model of DISRoBERTa-B, seed of 5 performance is:
 0.44315713251267197  and  {'test_loss': 0.9765864610671997, 'test_accuracy': 0.782563025210084, 'test_f1': 0.7742639040348965, 'test_precision': 0.7717391304347826, 'test_recall': 0.7768052516411379, 'test_AUC': 0.8777490440510134, 'test_runtime': 0.3061, 'test_samples_per_second': 3110.505, 'test_steps_per_second': 49.01}
205 DISRoBERTa-B 1759 2e-05 1500
Model seed is: 1759, total, training informative and uninf samples: 4760 ,             1905, 1903


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6198
200,0.5437
300,0.4301
400,0.4005
500,0.3664
600,0.2756
700,0.2879
800,0.2005
900,0.2126
1000,0.1385


for ratio of 205, model of DISRoBERTa-B, seed of 1759 performance is:
 0.47356987690079655  and  {'test_loss': 1.0238789319992065, 'test_accuracy': 0.7962184873949579, 'test_f1': 0.8008213552361396, 'test_precision': 0.781563126252505, 'test_recall': 0.8210526315789474, 'test_AUC': 0.87958071278826, 'test_runtime': 0.3031, 'test_samples_per_second': 3140.691, 'test_steps_per_second': 49.486}
205 DISRoBERTa-B 323 2e-05 1500
Model seed is: 323, total, training informative and uninf samples: 4760 ,             1921, 1887


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6278
200,0.5178
300,0.4574
400,0.412
500,0.3661
600,0.2831
700,0.2957
800,0.2025
900,0.2053
1000,0.1447


for ratio of 205, model of DISRoBERTa-B, seed of 323 performance is:
 0.47936278059377263  and  {'test_loss': 0.8263891339302063, 'test_accuracy': 0.8151260504201681, 'test_f1': 0.8131634819532909, 'test_precision': 0.7929606625258799, 'test_recall': 0.8344226579520697, 'test_AUC': 0.8969185149831851, 'test_runtime': 0.305, 'test_samples_per_second': 3121.787, 'test_steps_per_second': 49.188}
205 DISRoBERTa-B 200 2e-05 1500
Model seed is: 200, total, training informative and uninf samples: 4760 ,             1895, 1913


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6263
200,0.5221
300,0.4646
400,0.3845
500,0.3538
600,0.2663
700,0.2643
800,0.2035
900,0.1622
1000,0.1581


for ratio of 205, model of DISRoBERTa-B, seed of 200 performance is:
 0.46270818247646633  and  {'test_loss': 0.8349369764328003, 'test_accuracy': 0.8308823529411765, 'test_f1': 0.8335056876938988, 'test_precision': 0.8360995850622407, 'test_recall': 0.8309278350515464, 'test_AUC': 0.8947349831122101, 'test_runtime': 0.2997, 'test_samples_per_second': 3176.838, 'test_steps_per_second': 50.055}
205 DISRoBERTa-B 999 2e-05 1500
Model seed is: 999, total, training informative and uninf samples: 4760 ,             1915, 1893


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6433
200,0.5301
300,0.4552
400,0.4013
500,0.3671
600,0.2861
700,0.3132
800,0.2156
900,0.2155
1000,0.1753


for ratio of 205, model of DISRoBERTa-B, seed of 999 performance is:
 0.47719044170890657  and  {'test_loss': 0.8787192702293396, 'test_accuracy': 0.8067226890756303, 'test_f1': 0.8148893360160965, 'test_precision': 0.7655954631379962, 'test_recall': 0.8709677419354839, 'test_AUC': 0.8894129076416948, 'test_runtime': 0.3005, 'test_samples_per_second': 3167.705, 'test_steps_per_second': 49.911}
205 XLNet-B 9741 2e-05 1500
Model seed is: 9741, total, training informative and uninf samples: 4760 ,             1915, 1893


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6562
200,0.5764
300,0.4716
400,0.4477
500,0.3913
600,0.2994
700,0.2896
800,0.1971
900,0.1664
1000,0.159


for ratio of 205, model of XLNet-B, seed of 9741 performance is:
 0.5307748008689356  and  {'test_loss': 1.2782541513442993, 'test_accuracy': 0.7815126050420168, 'test_f1': 0.7924151696606786, 'test_precision': 0.7392923649906891, 'test_recall': 0.853763440860215, 'test_AUC': 0.8670817601731027, 'test_runtime': 0.7168, 'test_samples_per_second': 1328.172, 'test_steps_per_second': 20.927}
205 XLNet-B 1694 2e-05 1500
Model seed is: 1694, total, training informative and uninf samples: 4760 ,             1916, 1892


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6584
200,0.5966
300,0.4809
400,0.4408
500,0.3972
600,0.2895
700,0.2901
800,0.1817
900,0.1848
1000,0.1265


for ratio of 205, model of XLNet-B, seed of 1694 performance is:
 0.5155684286748733  and  {'test_loss': 1.3404302597045898, 'test_accuracy': 0.7941176470588235, 'test_f1': 0.8032128514056225, 'test_precision': 0.7518796992481203, 'test_recall': 0.8620689655172413, 'test_AUC': 0.8649484171848503, 'test_runtime': 0.7199, 'test_samples_per_second': 1322.368, 'test_steps_per_second': 20.836}
205 XLNet-B 6932 2e-05 1500
Model seed is: 6932, total, training informative and uninf samples: 4760 ,             1906, 1902


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6368
200,0.5855
300,0.492
400,0.4469
500,0.3931
600,0.2878
700,0.3001
800,0.1855
900,0.1989
1000,0.1698


for ratio of 205, model of XLNet-B, seed of 6932 performance is:
 0.5423606082548877  and  {'test_loss': 0.9318297505378723, 'test_accuracy': 0.832983193277311, 'test_f1': 0.8421052631578948, 'test_precision': 0.7954971857410882, 'test_recall': 0.8945147679324894, 'test_AUC': 0.898186889818689, 'test_runtime': 0.7226, 'test_samples_per_second': 1317.486, 'test_steps_per_second': 20.759}
205 XLNet-B 94 2e-05 1500
Model seed is: 94, total, training informative and uninf samples: 4760 ,             1917, 1891


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.617
200,0.5513
300,0.475
400,0.4352
500,0.4218
600,0.3005
700,0.3257
800,0.2263
900,0.2091
1000,0.1542


for ratio of 205, model of XLNet-B, seed of 94 performance is:
 0.502534395365677  and  {'test_loss': 1.0951285362243652, 'test_accuracy': 0.7899159663865546, 'test_f1': 0.7991967871485943, 'test_precision': 0.7467166979362101, 'test_recall': 0.8596112311015118, 'test_AUC': 0.8718458351552735, 'test_runtime': 0.7274, 'test_samples_per_second': 1308.793, 'test_steps_per_second': 20.622}
205 XLNet-B 791 2e-05 1500
Model seed is: 791, total, training informative and uninf samples: 4760 ,             1880, 1928


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6613
200,0.5747
300,0.4791
400,0.4526
500,0.3996
600,0.3009
700,0.3237
800,0.2164
900,0.2014
1000,0.1756


for ratio of 205, model of XLNet-B, seed of 791 performance is:
 0.5474293989862419  and  {'test_loss': 1.1444127559661865, 'test_accuracy': 0.7972689075630253, 'test_f1': 0.8131655372700871, 'test_precision': 0.7879924953095685, 'test_recall': 0.84, 'test_AUC': 0.8729070796460177, 'test_runtime': 0.7235, 'test_samples_per_second': 1315.775, 'test_steps_per_second': 20.732}
205 XLNet-B 5 2e-05 1500
Model seed is: 5, total, training informative and uninf samples: 4760 ,             1923, 1885


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6474
200,0.5617
300,0.513
400,0.4417
500,0.4084
600,0.2776
700,0.3168
800,0.1883
900,0.2246
1000,0.1798


for ratio of 205, model of XLNet-B, seed of 5 performance is:
 0.5184648805213613  and  {'test_loss': 1.1812115907669067, 'test_accuracy': 0.7804621848739496, 'test_f1': 0.7811518324607328, 'test_precision': 0.748995983935743, 'test_recall': 0.8161925601750547, 'test_AUC': 0.8662113476117852, 'test_runtime': 0.7211, 'test_samples_per_second': 1320.28, 'test_steps_per_second': 20.803}
205 XLNet-B 1759 2e-05 1500
Model seed is: 1759, total, training informative and uninf samples: 4760 ,             1905, 1903


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6676
200,0.586
300,0.4814
400,0.4715
500,0.4208
600,0.349
700,0.3357
800,0.2576
900,0.2379
1000,0.1769


for ratio of 205, model of XLNet-B, seed of 1759 performance is:
 0.5112237509051412  and  {'test_loss': 1.124600887298584, 'test_accuracy': 0.7888655462184874, 'test_f1': 0.803134182174339, 'test_precision': 0.7509157509157509, 'test_recall': 0.8631578947368421, 'test_AUC': 0.868816065320534, 'test_runtime': 0.7263, 'test_samples_per_second': 1310.726, 'test_steps_per_second': 20.652}
205 XLNet-B 323 2e-05 1500
Model seed is: 323, total, training informative and uninf samples: 4760 ,             1921, 1887


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6353
200,0.5503
300,0.4801
400,0.4192
500,0.3988
600,0.3136
700,0.2969
800,0.1743
900,0.2205
1000,0.1202


for ratio of 205, model of XLNet-B, seed of 323 performance is:
 0.49022447501810285  and  {'test_loss': 1.1619492769241333, 'test_accuracy': 0.805672268907563, 'test_f1': 0.8042328042328043, 'test_precision': 0.7818930041152263, 'test_recall': 0.8278867102396514, 'test_AUC': 0.8781458943730748, 'test_runtime': 0.7175, 'test_samples_per_second': 1326.867, 'test_steps_per_second': 20.907}
205 XLNet-B 200 2e-05 1500
Model seed is: 200, total, training informative and uninf samples: 4760 ,             1895, 1913


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6526
200,0.5524
300,0.5102
400,0.447
500,0.386
600,0.291
700,0.304
800,0.2281
900,0.1945
1000,0.1614


for ratio of 205, model of XLNet-B, seed of 200 performance is:
 0.5047067342505431  and  {'test_loss': 1.1270943880081177, 'test_accuracy': 0.7930672268907563, 'test_f1': 0.800808897876643, 'test_precision': 0.7857142857142857, 'test_recall': 0.8164948453608247, 'test_AUC': 0.8751451466919801, 'test_runtime': 0.7368, 'test_samples_per_second': 1292.152, 'test_steps_per_second': 20.36}
205 XLNet-B 999 2e-05 1500
Model seed is: 999, total, training informative and uninf samples: 4760 ,             1915, 1893


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.673
200,0.5818
300,0.545
400,0.4887
500,0.4503
600,0.3713
700,0.3904
800,0.2594
900,0.2465
1000,0.2189


for ratio of 205, model of XLNet-B, seed of 999 performance is:
 0.5047067342505431  and  {'test_loss': 0.8413652777671814, 'test_accuracy': 0.7993697478991597, 'test_f1': 0.8099502487562188, 'test_precision': 0.7537037037037037, 'test_recall': 0.875268817204301, 'test_AUC': 0.886458678324612, 'test_runtime': 0.7236, 'test_samples_per_second': 1315.601, 'test_steps_per_second': 20.729}
205 XLNet-L 9741 1e-05 1125
Model seed is: 9741, total, training informative and uninf samples: 4760 ,             1915, 1893


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6958
200,0.6342
300,0.5266
400,0.4706
500,0.3959
600,0.3277
700,0.2804
800,0.218
900,0.1854
1000,0.1462


for ratio of 205, model of XLNet-L, seed of 9741 performance is:
 0.5068790731354091  and  {'test_loss': 0.6583940386772156, 'test_accuracy': 0.8014705882352942, 'test_f1': 0.8088978766430739, 'test_precision': 0.7633587786259542, 'test_recall': 0.8602150537634409, 'test_AUC': 0.8798701728820297, 'test_runtime': 1.9949, 'test_samples_per_second': 477.219, 'test_steps_per_second': 7.519}
205 XLNet-L 1694 1e-05 1125
Model seed is: 1694, total, training informative and uninf samples: 4760 ,             1916, 1892


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6825
200,0.6278
300,0.5686
400,0.5106
500,0.4659
600,0.4207
700,0.3707
800,0.3329
900,0.2887
1000,0.2363


for ratio of 205, model of XLNet-L, seed of 1694 performance is:
 0.5865314989138306  and  {'test_loss': 0.6446636915206909, 'test_accuracy': 0.7752100840336135, 'test_f1': 0.7876984126984128, 'test_precision': 0.7297794117647058, 'test_recall': 0.8556034482758621, 'test_AUC': 0.8577851187111362, 'test_runtime': 2.005, 'test_samples_per_second': 474.816, 'test_steps_per_second': 7.481}
205 XLNet-L 6932 1e-05 1125
Model seed is: 6932, total, training informative and uninf samples: 4760 ,             1906, 1902


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.7163
200,0.6419
300,0.5344
400,0.465
500,0.3911
600,0.3289
700,0.2569
800,0.213
900,0.159
1000,0.1422


for ratio of 205, model of XLNet-L, seed of 6932 performance is:
 0.5199131064446053  and  {'test_loss': 0.7232224345207214, 'test_accuracy': 0.8014705882352942, 'test_f1': 0.8025078369905956, 'test_precision': 0.7950310559006211, 'test_recall': 0.810126582278481, 'test_AUC': 0.8827966385961196, 'test_runtime': 2.0044, 'test_samples_per_second': 474.945, 'test_steps_per_second': 7.483}
205 XLNet-L 94 1e-05 1125
Model seed is: 94, total, training informative and uninf samples: 4760 ,             1917, 1891


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.7024
200,0.6865
300,0.613
400,0.5473
500,0.5046
600,0.443
700,0.3717
800,0.3259
900,0.29
1000,0.2381


for ratio of 205, model of XLNet-L, seed of 94 performance is:
 0.5336712527154236  and  {'test_loss': 0.6485026478767395, 'test_accuracy': 0.7605042016806722, 'test_f1': 0.7663934426229507, 'test_precision': 0.7290448343079922, 'test_recall': 0.8077753779697624, 'test_AUC': 0.8469967801348898, 'test_runtime': 2.0146, 'test_samples_per_second': 472.546, 'test_steps_per_second': 7.446}
205 XLNet-L 791 1e-05 1125
Model seed is: 791, total, training informative and uninf samples: 4760 ,             1880, 1928


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.7065
200,0.63
300,0.5461
400,0.5217
500,0.4629
600,0.4109
700,0.3531
800,0.2977
900,0.237
1000,0.1985


for ratio of 205, model of XLNet-L, seed of 791 performance is:
 0.5047067342505431  and  {'test_loss': 0.6644926071166992, 'test_accuracy': 0.7710084033613446, 'test_f1': 0.782, 'test_precision': 0.782, 'test_recall': 0.782, 'test_AUC': 0.8470973451327434, 'test_runtime': 2.0079, 'test_samples_per_second': 474.128, 'test_steps_per_second': 7.47}
205 XLNet-L 5 1e-05 1125
Model seed is: 5, total, training informative and uninf samples: 4760 ,             1923, 1885


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6617
200,0.5689
300,0.4821
400,0.4321
500,0.352
600,0.2648
700,0.2179
800,0.154
900,0.115
1000,0.1053


for ratio of 205, model of XLNet-L, seed of 5 performance is:
 0.5532223026792179  and  {'test_loss': 0.9387831687927246, 'test_accuracy': 0.7846638655462185, 'test_f1': 0.7798066595059076, 'test_precision': 0.7658227848101266, 'test_recall': 0.7943107221006565, 'test_AUC': 0.8564551422319474, 'test_runtime': 1.9975, 'test_samples_per_second': 476.592, 'test_steps_per_second': 7.509}
205 XLNet-L 1759 1e-05 1125
Model seed is: 1759, total, training informative and uninf samples: 4760 ,             1905, 1903


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6851
200,0.5918
300,0.502
400,0.4386
500,0.3516
600,0.2968
700,0.2369
800,0.1681
900,0.146
1000,0.1144


for ratio of 205, model of XLNet-L, seed of 1759 performance is:
 0.5662563359884142  and  {'test_loss': 0.9021901488304138, 'test_accuracy': 0.7815126050420168, 'test_f1': 0.7960784313725491, 'test_precision': 0.744954128440367, 'test_recall': 0.8547368421052631, 'test_AUC': 0.8473264923314576, 'test_runtime': 1.998, 'test_samples_per_second': 476.465, 'test_steps_per_second': 7.507}
205 XLNet-L 323 1e-05 1125
Model seed is: 323, total, training informative and uninf samples: 4760 ,             1921, 1887


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.7004
200,0.6252
300,0.5544
400,0.489
500,0.4082
600,0.3726
700,0.2782
800,0.2289
900,0.1912
1000,0.1363


for ratio of 205, model of XLNet-L, seed of 323 performance is:
 0.5177407675597393  and  {'test_loss': 0.6749997138977051, 'test_accuracy': 0.8109243697478992, 'test_f1': 0.8097251585623679, 'test_precision': 0.7864476386036962, 'test_recall': 0.8344226579520697, 'test_AUC': 0.8865732454802971, 'test_runtime': 2.0064, 'test_samples_per_second': 474.493, 'test_steps_per_second': 7.476}
205 XLNet-L 200 1e-05 1125
Model seed is: 200, total, training informative and uninf samples: 4760 ,             1895, 1913


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.677
200,0.6044
300,0.5258
400,0.4756
500,0.4351
600,0.3839
700,0.3287
800,0.2923
900,0.2406
1000,0.2328


for ratio of 205, model of XLNet-L, seed of 200 performance is:
 0.49022447501810285  and  {'test_loss': 0.5796353220939636, 'test_accuracy': 0.7867647058823529, 'test_f1': 0.7943262411347517, 'test_precision': 0.7808764940239044, 'test_recall': 0.8082474226804124, 'test_AUC': 0.8698646769244354, 'test_runtime': 2.0023, 'test_samples_per_second': 475.462, 'test_steps_per_second': 7.492}
205 XLNet-L 999 1e-05 1125
Model seed is: 999, total, training informative and uninf samples: 4760 ,             1915, 1893


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6875
200,0.5974
300,0.5126
400,0.4489
500,0.3764
600,0.2963
700,0.2488
800,0.1736
900,0.1655
1000,0.1127


for ratio of 205, model of XLNet-L, seed of 999 performance is:
 0.5206372194062274  and  {'test_loss': 0.7851001620292664, 'test_accuracy': 0.7962184873949579, 'test_f1': 0.8067729083665339, 'test_precision': 0.75139146567718, 'test_recall': 0.8709677419354839, 'test_AUC': 0.8632664326245831, 'test_runtime': 2.0056, 'test_samples_per_second': 474.674, 'test_steps_per_second': 7.479}
205 DEBERT-B 9741 3e-05 1500
Model seed is: 9741, total, training informative and uninf samples: 4760 ,             1915, 1893


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.7023
200,0.6478
300,0.4972
400,0.4677
500,0.3946
600,0.2537
700,0.275
800,0.168
900,0.1348
1000,0.1084


for ratio of 205, model of DEBERT-B, seed of 9741 performance is:
 0.4554670528602462  and  {'test_loss': 1.2128578424453735, 'test_accuracy': 0.8077731092436975, 'test_f1': 0.8130745658835548, 'test_precision': 0.77431906614786, 'test_recall': 0.8559139784946237, 'test_AUC': 0.8926541697025899, 'test_runtime': 0.6389, 'test_samples_per_second': 1490.129, 'test_steps_per_second': 23.479}
205 DEBERT-B 1694 3e-05 1500
Model seed is: 1694, total, training informative and uninf samples: 4760 ,             1916, 1892


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6914
200,0.5895
300,0.4644
400,0.4059
500,0.3481
600,0.2271
700,0.2515
800,0.1199
900,0.1177
1000,0.0947


for ratio of 205, model of DEBERT-B, seed of 1694 performance is:
 0.5054308472121651  and  {'test_loss': 1.1499333381652832, 'test_accuracy': 0.8004201680672269, 'test_f1': 0.8057259713701432, 'test_precision': 0.7665369649805448, 'test_recall': 0.8491379310344828, 'test_AUC': 0.8782460076314301, 'test_runtime': 0.6392, 'test_samples_per_second': 1489.375, 'test_steps_per_second': 23.467}
205 DEBERT-B 6932 3e-05 1500
Model seed is: 6932, total, training informative and uninf samples: 4760 ,             1906, 1902


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6002
200,0.5225
300,0.4029
400,0.3272
500,0.2592
600,0.1552
700,0.1474
800,0.0643
900,0.0736
1000,0.0699


for ratio of 205, model of DEBERT-B, seed of 6932 performance is:
 0.4786386676321506  and  {'test_loss': 1.1515048742294312, 'test_accuracy': 0.8266806722689075, 'test_f1': 0.8345035105315949, 'test_precision': 0.7954110898661568, 'test_recall': 0.8776371308016878, 'test_AUC': 0.9077732464735271, 'test_runtime': 0.652, 'test_samples_per_second': 1460.176, 'test_steps_per_second': 23.007}
205 DEBERT-B 94 3e-05 1500
Model seed is: 94, total, training informative and uninf samples: 4760 ,             1917, 1891


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.626
200,0.5247
300,0.4123
400,0.3335
500,0.3081
600,0.1832
700,0.186
800,0.1074
900,0.1116
1000,0.0931


for ratio of 205, model of DEBERT-B, seed of 94 performance is:
 0.47429398986241855  and  {'test_loss': 1.1753308773040771, 'test_accuracy': 0.8161764705882353, 'test_f1': 0.822695035460993, 'test_precision': 0.7748091603053435, 'test_recall': 0.8768898488120951, 'test_AUC': 0.8820001148374387, 'test_runtime': 0.6512, 'test_samples_per_second': 1461.913, 'test_steps_per_second': 23.034}
205 DEBERT-B 791 3e-05 1500
Model seed is: 791, total, training informative and uninf samples: 4760 ,             1880, 1928


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6921
200,0.6191
300,0.5039
400,0.4643
500,0.4347
600,0.2782
700,0.3294
800,0.1944
900,0.2094
1000,0.1507


for ratio of 205, model of DEBERT-B, seed of 791 performance is:
 0.4887762490948588  and  {'test_loss': 1.0770024061203003, 'test_accuracy': 0.7962184873949579, 'test_f1': 0.8083003952569169, 'test_precision': 0.798828125, 'test_recall': 0.818, 'test_AUC': 0.8745353982300885, 'test_runtime': 0.6534, 'test_samples_per_second': 1456.896, 'test_steps_per_second': 22.955}
205 DEBERT-B 5 3e-05 1500
Model seed is: 5, total, training informative and uninf samples: 4760 ,             1923, 1885


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6514
200,0.5361
300,0.4445
400,0.3827
500,0.3225
600,0.2178
700,0.2255
800,0.1475
900,0.1179
1000,0.1123


for ratio of 205, model of DEBERT-B, seed of 5 performance is:
 0.47719044170890657  and  {'test_loss': 1.1581052541732788, 'test_accuracy': 0.8235294117647058, 'test_f1': 0.824634655532359, 'test_precision': 0.7884231536926147, 'test_recall': 0.8643326039387309, 'test_AUC': 0.8981234666136197, 'test_runtime': 0.6326, 'test_samples_per_second': 1504.996, 'test_steps_per_second': 23.713}
205 DEBERT-B 1759 3e-05 1500
Model seed is: 1759, total, training informative and uninf samples: 4760 ,             1905, 1903


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6387
200,0.5433
300,0.3889
400,0.3483
500,0.3143
600,0.1838
700,0.2172
800,0.1244
900,0.1054
1000,0.0824


for ratio of 205, model of DEBERT-B, seed of 1759 performance is:
 0.4540188269370022  and  {'test_loss': 1.3101398944854736, 'test_accuracy': 0.8035714285714286, 'test_f1': 0.816125860373648, 'test_precision': 0.7656826568265682, 'test_recall': 0.8736842105263158, 'test_AUC': 0.88219353414984, 'test_runtime': 0.6463, 'test_samples_per_second': 1473.054, 'test_steps_per_second': 23.21}
205 DEBERT-B 323 3e-05 1500
Model seed is: 323, total, training informative and uninf samples: 4760 ,             1921, 1887


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6326
200,0.5013
300,0.4041
400,0.351
500,0.2913
600,0.202
700,0.1807
800,0.0962
900,0.1028
1000,0.058


for ratio of 205, model of DEBERT-B, seed of 323 performance is:
 0.502534395365677  and  {'test_loss': 1.1719540357589722, 'test_accuracy': 0.8224789915966386, 'test_f1': 0.8280773143438452, 'test_precision': 0.7767175572519084, 'test_recall': 0.8867102396514162, 'test_AUC': 0.8951862015935516, 'test_runtime': 0.6469, 'test_samples_per_second': 1471.613, 'test_steps_per_second': 23.187}
205 DEBERT-B 200 3e-05 1500
Model seed is: 200, total, training informative and uninf samples: 4760 ,             1895, 1913


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6849
200,0.5792
300,0.4927
400,0.3888
500,0.3535
600,0.2255
700,0.2183
800,0.1434
900,0.1144
1000,0.0832


for ratio of 205, model of DEBERT-B, seed of 200 performance is:
 0.4547429398986242  and  {'test_loss': 1.106855869293213, 'test_accuracy': 0.8161764705882353, 'test_f1': 0.8197734294541709, 'test_precision': 0.8189300411522634, 'test_recall': 0.8206185567010309, 'test_AUC': 0.8988851851034239, 'test_runtime': 0.6358, 'test_samples_per_second': 1497.339, 'test_steps_per_second': 23.593}
205 DEBERT-B 999 3e-05 1500
Model seed is: 999, total, training informative and uninf samples: 4760 ,             1915, 1893


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6423
200,0.5123
300,0.4302
400,0.3536
500,0.3313
600,0.1977
700,0.222
800,0.1294
900,0.1026
1000,0.0776


for ratio of 205, model of DEBERT-B, seed of 999 performance is:
 0.4721216509775525  and  {'test_loss': 1.226496696472168, 'test_accuracy': 0.8035714285714286, 'test_f1': 0.8093781855249745, 'test_precision': 0.7693798449612403, 'test_recall': 0.853763440860215, 'test_AUC': 0.8817822525446558, 'test_runtime': 0.6442, 'test_samples_per_second': 1477.726, 'test_steps_per_second': 23.283}
205 DEBERT-L 9741 1e-05 1500
Model seed is: 9741, total, training informative and uninf samples: 4760 ,             1915, 1893


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6302
200,0.5069
300,0.3704
400,0.3406
500,0.2791
600,0.1695
700,0.2157
800,0.0956
900,0.0782
1000,0.0719


for ratio of 205, model of DEBERT-L, seed of 9741 performance is:
 0.49384503982621286  and  {'test_loss': 1.3307894468307495, 'test_accuracy': 0.8140756302521008, 'test_f1': 0.8203045685279188, 'test_precision': 0.7769230769230769, 'test_recall': 0.8688172043010752, 'test_AUC': 0.8837649864211433, 'test_runtime': 1.7993, 'test_samples_per_second': 529.082, 'test_steps_per_second': 8.336}
205 DEBERT-L 1694 1e-05 1500
Model seed is: 1694, total, training informative and uninf samples: 4760 ,             1916, 1892


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.675
200,0.5978
300,0.4748
400,0.3882
500,0.3398
600,0.2416
700,0.2211
800,0.1542
900,0.1359
1000,0.0903


for ratio of 205, model of DEBERT-L, seed of 1694 performance is:
 0.48587979724837077  and  {'test_loss': 1.129522681236267, 'test_accuracy': 0.8266806722689075, 'test_f1': 0.8286604361370716, 'test_precision': 0.7995991983967936, 'test_recall': 0.8599137931034483, 'test_AUC': 0.8733350409836067, 'test_runtime': 1.7891, 'test_samples_per_second': 532.119, 'test_steps_per_second': 8.384}
205 DEBERT-L 6932 1e-05 1500
Model seed is: 6932, total, training informative and uninf samples: 4760 ,             1906, 1902


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6082
200,0.5369
300,0.4128
400,0.3547
500,0.3111
600,0.1997
700,0.1932
800,0.0922
900,0.1099
1000,0.0792


for ratio of 205, model of DEBERT-L, seed of 6932 performance is:
 0.5416364952932657  and  {'test_loss': 1.025769829750061, 'test_accuracy': 0.8476890756302521, 'test_f1': 0.8554336989032902, 'test_precision': 0.8109640831758034, 'test_recall': 0.9050632911392406, 'test_AUC': 0.9145922708896068, 'test_runtime': 1.797, 'test_samples_per_second': 529.772, 'test_steps_per_second': 8.347}
205 DEBERT-L 94 1e-05 1500
Model seed is: 94, total, training informative and uninf samples: 4760 ,             1917, 1891


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6872
200,0.6071
300,0.4598
400,0.4296
500,0.3719
600,0.2383
700,0.2872
800,0.1876
900,0.1775
1000,0.1083


for ratio of 205, model of DEBERT-L, seed of 94 performance is:
 0.4837074583635047  and  {'test_loss': 1.2619410753250122, 'test_accuracy': 0.8140756302521008, 'test_f1': 0.8180883864337103, 'test_precision': 0.7803921568627451, 'test_recall': 0.8596112311015118, 'test_AUC': 0.8855689090885, 'test_runtime': 1.7895, 'test_samples_per_second': 532.004, 'test_steps_per_second': 8.382}
205 DEBERT-L 791 1e-05 1500
Model seed is: 791, total, training informative and uninf samples: 4760 ,             1880, 1928


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6623
200,0.5128
300,0.3924
400,0.3316
500,0.2989
600,0.2039
700,0.2206
800,0.1194
900,0.1354
1000,0.1069


for ratio of 205, model of DEBERT-L, seed of 791 performance is:
 0.5257060101375814  and  {'test_loss': 1.1427109241485596, 'test_accuracy': 0.8308823529411765, 'test_f1': 0.8447444551591129, 'test_precision': 0.8156424581005587, 'test_recall': 0.876, 'test_AUC': 0.8733141592920355, 'test_runtime': 1.7862, 'test_samples_per_second': 532.986, 'test_steps_per_second': 8.398}
205 DEBERT-L 5 1e-05 1500
Model seed is: 5, total, training informative and uninf samples: 4760 ,             1923, 1885


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6551
200,0.5192
300,0.436
400,0.3704
500,0.2929
600,0.1962
700,0.2167
800,0.1175
900,0.1221
1000,0.081


for ratio of 205, model of DEBERT-L, seed of 5 performance is:
 0.5068790731354091  and  {'test_loss': 1.123939871788025, 'test_accuracy': 0.832983193277311, 'test_f1': 0.8338557993730408, 'test_precision': 0.798, 'test_recall': 0.8730853391684902, 'test_AUC': 0.8987865526158743, 'test_runtime': 1.7996, 'test_samples_per_second': 529.015, 'test_steps_per_second': 8.335}
205 DEBERT-L 1759 1e-05 1500
Model seed is: 1759, total, training informative and uninf samples: 4760 ,             1905, 1903


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6693
200,0.5543
300,0.4192
400,0.3812
500,0.3366
600,0.2422
700,0.2276
800,0.1394
900,0.1342
1000,0.1343


for ratio of 205, model of DEBERT-L, seed of 1759 performance is:
 0.5097755249818972  and  {'test_loss': 1.2653828859329224, 'test_accuracy': 0.819327731092437, 'test_f1': 0.8276553106212424, 'test_precision': 0.7896749521988528, 'test_recall': 0.8694736842105263, 'test_AUC': 0.8834867041818383, 'test_runtime': 1.7998, 'test_samples_per_second': 528.957, 'test_steps_per_second': 8.334}
205 DEBERT-L 323 1e-05 1500
Model seed is: 323, total, training informative and uninf samples: 4760 ,             1921, 1887


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6549
200,0.5003
300,0.3813
400,0.3446
500,0.2829
600,0.1839
700,0.1975
800,0.1115
900,0.097
1000,0.043


for ratio of 205, model of DEBERT-L, seed of 323 performance is:
 0.46415640839971034  and  {'test_loss': 0.9854920506477356, 'test_accuracy': 0.8476890756302521, 'test_f1': 0.84688489968321, 'test_precision': 0.8217213114754098, 'test_recall': 0.8736383442265795, 'test_AUC': 0.9116122446273981, 'test_runtime': 1.8027, 'test_samples_per_second': 528.092, 'test_steps_per_second': 8.321}
205 DEBERT-L 200 1e-05 1500
Model seed is: 200, total, training informative and uninf samples: 4760 ,             1895, 1913


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6647
200,0.5518
300,0.4662
400,0.3752
500,0.3143
600,0.234
700,0.2467
800,0.1704
900,0.1413
1000,0.1112


for ratio of 205, model of DEBERT-L, seed of 200 performance is:
 0.5293265749456916  and  {'test_loss': 0.9972242116928101, 'test_accuracy': 0.832983193277311, 'test_f1': 0.8405215646940823, 'test_precision': 0.818359375, 'test_recall': 0.8639175257731959, 'test_AUC': 0.8999757169032429, 'test_runtime': 1.7865, 'test_samples_per_second': 532.888, 'test_steps_per_second': 8.396}
205 DEBERT-L 999 1e-05 1500
Model seed is: 999, total, training informative and uninf samples: 4760 ,             1915, 1893


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6707
200,0.5948
300,0.5141
400,0.4464
500,0.3864
600,0.272
700,0.2775
800,0.2071
900,0.1722
1000,0.1538


for ratio of 205, model of DEBERT-L, seed of 999 performance is:
 0.4844315713251267  and  {'test_loss': 1.0204252004623413, 'test_accuracy': 0.8182773109243697, 'test_f1': 0.8236493374108053, 'test_precision': 0.7829457364341085, 'test_recall': 0.8688172043010752, 'test_AUC': 0.888202954229317, 'test_runtime': 1.7984, 'test_samples_per_second': 529.363, 'test_steps_per_second': 8.341}
205 XLM-B 9741 2e-05 1500
Model seed is: 9741, total, training informative and uninf samples: 4760 ,             1915, 1893


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6674
200,0.6018
300,0.5101
400,0.4829
500,0.4333
600,0.3399
700,0.3488
800,0.2683
900,0.2518
1000,0.2197


for ratio of 205, model of XLM-B, seed of 9741 performance is:
 0.4692251991310644  and  {'test_loss': 0.8607906103134155, 'test_accuracy': 0.8130252100840336, 'test_f1': 0.819838056680162, 'test_precision': 0.7743785850860421, 'test_recall': 0.8709677419354839, 'test_AUC': 0.8756088406085096, 'test_runtime': 0.5426, 'test_samples_per_second': 1754.583, 'test_steps_per_second': 27.646}
205 XLM-B 1694 2e-05 1500
Model seed is: 1694, total, training informative and uninf samples: 4760 ,             1916, 1892


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6666
200,0.6112
300,0.5652
400,0.5033
500,0.4791
600,0.4111
700,0.4036
800,0.3132
900,0.3061
1000,0.2365


for ratio of 205, model of XLM-B, seed of 1694 performance is:
 0.49167270094134685  and  {'test_loss': 0.7507879137992859, 'test_accuracy': 0.8130252100840336, 'test_f1': 0.8219999999999998, 'test_precision': 0.7667910447761194, 'test_recall': 0.8857758620689655, 'test_AUC': 0.8800787874505369, 'test_runtime': 0.5341, 'test_samples_per_second': 1782.477, 'test_steps_per_second': 28.085}
205 XLM-B 6932 2e-05 1500
Model seed is: 6932, total, training informative and uninf samples: 4760 ,             1906, 1902


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6622
200,0.6407
300,0.5598
400,0.5162
500,0.4561
600,0.3636
700,0.3861
800,0.2673
900,0.2599
1000,0.2402


for ratio of 205, model of XLM-B, seed of 6932 performance is:
 0.4706734250543085  and  {'test_loss': 0.8707269430160522, 'test_accuracy': 0.8025210084033614, 'test_f1': 0.8053830227743272, 'test_precision': 0.790650406504065, 'test_recall': 0.820675105485232, 'test_AUC': 0.8841207210070089, 'test_runtime': 0.5313, 'test_samples_per_second': 1791.752, 'test_steps_per_second': 28.231}
205 XLM-B 94 2e-05 1500
Model seed is: 94, total, training informative and uninf samples: 4760 ,             1917, 1891


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6713
200,0.6056
300,0.5319
400,0.4817
500,0.4388
600,0.354
700,0.356
800,0.2497
900,0.2578
1000,0.1835


for ratio of 205, model of XLM-B, seed of 94 performance is:
 0.49167270094134685  and  {'test_loss': 0.9263323545455933, 'test_accuracy': 0.8067226890756303, 'test_f1': 0.8188976377952756, 'test_precision': 0.7522603978300181, 'test_recall': 0.8984881209503239, 'test_AUC': 0.884535372139554, 'test_runtime': 0.5418, 'test_samples_per_second': 1757.174, 'test_steps_per_second': 27.687}
205 XLM-B 791 2e-05 1500
Model seed is: 791, total, training informative and uninf samples: 4760 ,             1880, 1928


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.661
200,0.5727
300,0.4843
400,0.4389
500,0.3892
600,0.3231
700,0.3336
800,0.2404
900,0.1944
1000,0.2314


for ratio of 205, model of XLM-B, seed of 791 performance is:
 0.48081100651701664  and  {'test_loss': 0.9531551599502563, 'test_accuracy': 0.8067226890756303, 'test_f1': 0.8196078431372548, 'test_precision': 0.8038461538461539, 'test_recall': 0.836, 'test_AUC': 0.8824292035398229, 'test_runtime': 0.5375, 'test_samples_per_second': 1771.148, 'test_steps_per_second': 27.907}
205 XLM-B 5 2e-05 1500
Model seed is: 5, total, training informative and uninf samples: 4760 ,             1923, 1885


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6531
200,0.5622
300,0.5242
400,0.4649
500,0.3941
600,0.3203
700,0.3392
800,0.2376
900,0.2711
1000,0.2002


for ratio of 205, model of XLM-B, seed of 5 performance is:
 0.46488052136133234  and  {'test_loss': 1.0169942378997803, 'test_accuracy': 0.792016806722689, 'test_f1': 0.7924528301886793, 'test_precision': 0.7605633802816901, 'test_recall': 0.8271334792122538, 'test_AUC': 0.8758039917777335, 'test_runtime': 0.5375, 'test_samples_per_second': 1771.281, 'test_steps_per_second': 27.909}
205 XLM-B 1759 2e-05 1500
Model seed is: 1759, total, training informative and uninf samples: 4760 ,             1905, 1903


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6697
200,0.6187
300,0.5117
400,0.496
500,0.4433
600,0.3696
700,0.3478
800,0.2564
900,0.257
1000,0.2196


for ratio of 205, model of XLM-B, seed of 1759 performance is:
 0.5097755249818972  and  {'test_loss': 1.030152678489685, 'test_accuracy': 0.7867647058823529, 'test_f1': 0.8007850834151129, 'test_precision': 0.75, 'test_recall': 0.8589473684210527, 'test_AUC': 0.8644025157232704, 'test_runtime': 0.5362, 'test_samples_per_second': 1775.356, 'test_steps_per_second': 27.973}
205 XLM-B 323 2e-05 1500
Model seed is: 323, total, training informative and uninf samples: 4760 ,             1921, 1887


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6649
200,0.6159
300,0.5187
400,0.4903
500,0.4456
600,0.3387
700,0.3526
800,0.2689
900,0.2565
1000,0.218


for ratio of 205, model of XLM-B, seed of 323 performance is:
 0.5126719768283853  and  {'test_loss': 0.9005616903305054, 'test_accuracy': 0.8119747899159664, 'test_f1': 0.814122533748702, 'test_precision': 0.7777777777777778, 'test_recall': 0.8540305010893247, 'test_AUC': 0.8916331914780787, 'test_runtime': 0.5391, 'test_samples_per_second': 1765.747, 'test_steps_per_second': 27.822}
205 XLM-B 200 2e-05 1500
Model seed is: 200, total, training informative and uninf samples: 4760 ,             1895, 1913


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6851
200,0.6102
300,0.5488
400,0.4712
500,0.4661
600,0.3834
700,0.397
800,0.3338
900,0.3056
1000,0.2572


for ratio of 205, model of XLM-B, seed of 200 performance is:
 0.503982621288921  and  {'test_loss': 0.7358498573303223, 'test_accuracy': 0.8151260504201681, 'test_f1': 0.828125, 'test_precision': 0.7866419294990723, 'test_recall': 0.8742268041237113, 'test_AUC': 0.8901344400538643, 'test_runtime': 0.5295, 'test_samples_per_second': 1797.979, 'test_steps_per_second': 28.33}
205 XLM-B 999 2e-05 1500
Model seed is: 999, total, training informative and uninf samples: 4760 ,             1915, 1893


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6672
200,0.5805
300,0.5269
400,0.4539
500,0.4337
600,0.3436
700,0.3469
800,0.251
900,0.2657
1000,0.2176


for ratio of 205, model of XLM-B, seed of 999 performance is:
 0.5271542360608255  and  {'test_loss': 0.9083548188209534, 'test_accuracy': 0.8046218487394958, 'test_f1': 0.8154761904761904, 'test_precision': 0.7569060773480663, 'test_recall': 0.8838709677419355, 'test_AUC': 0.8699829988297896, 'test_runtime': 0.5428, 'test_samples_per_second': 1753.913, 'test_steps_per_second': 27.635}
205 XLM-L 9741 8e-06 1500
Model seed is: 9741, total, training informative and uninf samples: 4760 ,             1915, 1893


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6976
200,0.5857
300,0.5111
400,0.48
500,0.43
600,0.352
700,0.3638
800,0.2902
900,0.2684
1000,0.2286


for ratio of 205, model of XLM-L, seed of 9741 performance is:
 0.498913830557567  and  {'test_loss': 0.7976934313774109, 'test_accuracy': 0.8067226890756303, 'test_f1': 0.8130081300813009, 'test_precision': 0.7707129094412332, 'test_recall': 0.8602150537634409, 'test_AUC': 0.8875626504161975, 'test_runtime': 1.5844, 'test_samples_per_second': 600.851, 'test_steps_per_second': 9.467}
205 XLM-L 1694 8e-06 1500
Model seed is: 1694, total, training informative and uninf samples: 4760 ,             1916, 1892


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6834
200,0.6032
300,0.5267
400,0.4648
500,0.4183
600,0.3681
700,0.3593
800,0.2957
900,0.2934
1000,0.2375


for ratio of 205, model of XLM-L, seed of 1694 performance is:
 0.49239681390296886  and  {'test_loss': 0.7284196615219116, 'test_accuracy': 0.8140756302521008, 'test_f1': 0.8221105527638192, 'test_precision': 0.7702448210922788, 'test_recall': 0.8814655172413793, 'test_AUC': 0.8873392453363483, 'test_runtime': 1.5633, 'test_samples_per_second': 608.961, 'test_steps_per_second': 9.595}
205 XLM-L 6932 8e-06 1500
Model seed is: 6932, total, training informative and uninf samples: 4760 ,             1906, 1902


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6881
200,0.6116
300,0.523
400,0.4854
500,0.4185
600,0.3309
700,0.3525
800,0.2513
900,0.2679
1000,0.2454


for ratio of 205, model of XLM-L, seed of 6932 performance is:
 0.5314989138305576  and  {'test_loss': 0.664928674697876, 'test_accuracy': 0.8287815126050421, 'test_f1': 0.8368368368368369, 'test_precision': 0.7961904761904762, 'test_recall': 0.8818565400843882, 'test_AUC': 0.903973129954275, 'test_runtime': 1.5656, 'test_samples_per_second': 608.061, 'test_steps_per_second': 9.581}
205 XLM-L 94 8e-06 1500
Model seed is: 94, total, training informative and uninf samples: 4760 ,             1917, 1891


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6884
200,0.5817
300,0.5024
400,0.4553
500,0.4283
600,0.348
700,0.3475
800,0.2763
900,0.2614
1000,0.2166


for ratio of 205, model of XLM-L, seed of 94 performance is:
 0.4685010861694424  and  {'test_loss': 0.7512494325637817, 'test_accuracy': 0.8067226890756303, 'test_f1': 0.8095238095238095, 'test_precision': 0.7773359840954275, 'test_recall': 0.8444924406047516, 'test_AUC': 0.8829408984704537, 'test_runtime': 1.5603, 'test_samples_per_second': 610.133, 'test_steps_per_second': 9.613}
205 XLM-L 791 8e-06 1500
Model seed is: 791, total, training informative and uninf samples: 4760 ,             1880, 1928


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.7106
200,0.5977
300,0.5043
400,0.4577
500,0.419
600,0.3506
700,0.3603
800,0.2766
900,0.2751
1000,0.24


for ratio of 205, model of XLM-L, seed of 791 performance is:
 0.5257060101375814  and  {'test_loss': 0.8801576495170593, 'test_accuracy': 0.7962184873949579, 'test_f1': 0.814176245210728, 'test_precision': 0.78125, 'test_recall': 0.85, 'test_AUC': 0.8766061946902655, 'test_runtime': 1.5718, 'test_samples_per_second': 605.677, 'test_steps_per_second': 9.543}
205 XLM-L 5 8e-06 1500
Model seed is: 5, total, training informative and uninf samples: 4760 ,             1923, 1885


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.7253
200,0.7078
300,0.7055
400,0.6965
500,0.7017
600,0.6976
700,0.6936
800,0.6872
900,0.6953
1000,0.6911


for ratio of 205, model of XLM-L, seed of 5 performance is:
 0.44677769732078204  and  {'test_loss': 0.6229378581047058, 'test_accuracy': 0.6932773109243697, 'test_f1': 0.6604651162790698, 'test_precision': 0.7047146401985112, 'test_recall': 0.6214442013129103, 'test_AUC': 0.7114094998121258, 'test_runtime': 1.5754, 'test_samples_per_second': 604.296, 'test_steps_per_second': 9.521}
205 XLM-L 1759 8e-06 1500
Model seed is: 1759, total, training informative and uninf samples: 4760 ,             1905, 1903


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.7164
200,0.6518
300,0.5563
400,0.5362
500,0.4681
600,0.4084
700,0.3719
800,0.3078
900,0.2867
1000,0.2952


for ratio of 205, model of XLM-L, seed of 1759 performance is:
 0.49746560463432293  and  {'test_loss': 0.6352068185806274, 'test_accuracy': 0.8224789915966386, 'test_f1': 0.8318407960199007, 'test_precision': 0.7886792452830189, 'test_recall': 0.88, 'test_AUC': 0.8903409467063885, 'test_runtime': 1.566, 'test_samples_per_second': 607.912, 'test_steps_per_second': 9.578}
205 XLM-L 323 8e-06 1500
Model seed is: 323, total, training informative and uninf samples: 4760 ,             1921, 1887


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6826
200,0.5866
300,0.518
400,0.4927
500,0.4303
600,0.3723
700,0.3476
800,0.317
900,0.2769
1000,0.2149


for ratio of 205, model of XLM-L, seed of 323 performance is:
 0.503258508327299  and  {'test_loss': 0.6837037801742554, 'test_accuracy': 0.8077731092436975, 'test_f1': 0.8103626943005181, 'test_precision': 0.7727272727272727, 'test_recall': 0.8518518518518519, 'test_AUC': 0.8977669950107607, 'test_runtime': 1.5707, 'test_samples_per_second': 606.105, 'test_steps_per_second': 9.55}
205 XLM-L 200 8e-06 1500
Model seed is: 200, total, training informative and uninf samples: 4760 ,             1895, 1913


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6875
200,0.6117
300,0.5688
400,0.4719
500,0.4366
600,0.3797
700,0.3843
800,0.3226
900,0.2927
1000,0.2749


for ratio of 205, model of XLM-L, seed of 200 performance is:
 0.5141202027516293  and  {'test_loss': 0.7071895003318787, 'test_accuracy': 0.8256302521008403, 'test_f1': 0.8295687885010268, 'test_precision': 0.8261758691206544, 'test_recall': 0.8329896907216495, 'test_AUC': 0.8849555177818496, 'test_runtime': 1.5648, 'test_samples_per_second': 608.393, 'test_steps_per_second': 9.586}
205 XLM-L 999 8e-06 1500
Model seed is: 999, total, training informative and uninf samples: 4760 ,             1915, 1893


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6742
200,0.6176
300,0.5694
400,0.5331
500,0.497
600,0.4439
700,0.4434
800,0.3599
900,0.3375
1000,0.3365


for ratio of 205, model of XLM-L, seed of 999 performance is:
 0.5459811730629979  and  {'test_loss': 0.5538247227668762, 'test_accuracy': 0.8235294117647058, 'test_f1': 0.8330019880715706, 'test_precision': 0.7744916820702403, 'test_recall': 0.9010752688172043, 'test_AUC': 0.8930648473206598, 'test_runtime': 1.5701, 'test_samples_per_second': 606.337, 'test_steps_per_second': 9.554}
number of training samples:  (3808, 13)
195 ALBERT-L 9741 8e-06 1500
Model seed is: 9741, total, training informative and uninf samples: 4508 ,             1809, 1797


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6222
200,0.538
300,0.4284
400,0.3924
500,0.3351
600,0.2558
700,0.2342
800,0.1444
900,0.1384
1000,0.0906


for ratio of 195, model of ALBERT-L, seed of 9741 performance is:
 0.5080906148867314  and  {'test_loss': 1.1033337116241455, 'test_accuracy': 0.7904656319290465, 'test_f1': 0.7925356750823271, 'test_precision': 0.7746781115879828, 'test_recall': 0.8112359550561797, 'test_AUC': 0.8700218818380745, 'test_runtime': 1.6762, 'test_samples_per_second': 538.133, 'test_steps_per_second': 8.949}
195 ALBERT-L 1694 8e-06 1500
Model seed is: 1694, total, training informative and uninf samples: 4508 ,             1810, 1796


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6364
200,0.5696
300,0.4871
400,0.4309
500,0.3639
600,0.3166
700,0.2892
800,0.1884
900,0.2298
1000,0.1181


for ratio of 195, model of ALBERT-L, seed of 1694 performance is:
 0.44919093851132685  and  {'test_loss': 1.037533164024353, 'test_accuracy': 0.8104212860310421, 'test_f1': 0.8041237113402062, 'test_precision': 0.8181818181818182, 'test_recall': 0.7905405405405406, 'test_AUC': 0.8641911562217239, 'test_runtime': 1.6844, 'test_samples_per_second': 535.505, 'test_steps_per_second': 8.905}
195 ALBERT-L 6932 8e-06 1500
Model seed is: 6932, total, training informative and uninf samples: 4508 ,             1792, 1814


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6078
200,0.5151
300,0.421
400,0.4045
500,0.3415
600,0.2349
700,0.2448
800,0.1444
900,0.1777
1000,0.0876


for ratio of 195, model of ALBERT-L, seed of 6932 performance is:
 0.543042071197411  and  {'test_loss': 1.1511292457580566, 'test_accuracy': 0.7727272727272727, 'test_f1': 0.7884416924664603, 'test_precision': 0.7534516765285996, 'test_recall': 0.8268398268398268, 'test_AUC': 0.8383116883116883, 'test_runtime': 1.6803, 'test_samples_per_second': 536.816, 'test_steps_per_second': 8.927}
195 ALBERT-L 94 8e-06 1500
Model seed is: 94, total, training informative and uninf samples: 4508 ,             1789, 1817


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6527
200,0.5686
300,0.4812
400,0.436
500,0.3501
600,0.3255
700,0.2976
800,0.2467
900,0.2221
1000,0.1343


for ratio of 195, model of ALBERT-L, seed of 94 performance is:
 0.4349514563106796  and  {'test_loss': 0.8889155387878418, 'test_accuracy': 0.811529933481153, 'test_f1': 0.8195329087048832, 'test_precision': 0.8092243186582809, 'test_recall': 0.8301075268817204, 'test_AUC': 0.8790088826554463, 'test_runtime': 1.6744, 'test_samples_per_second': 538.696, 'test_steps_per_second': 8.958}
195 ALBERT-L 791 8e-06 1500
Model seed is: 791, total, training informative and uninf samples: 4508 ,             1822, 1784


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6377
200,0.5875
300,0.494
400,0.4324
500,0.3592
600,0.3259
700,0.2957
800,0.1645
900,0.2351
1000,0.1493


for ratio of 195, model of ALBERT-L, seed of 791 performance is:
 0.49902912621359224  and  {'test_loss': 1.1560420989990234, 'test_accuracy': 0.7815964523281597, 'test_f1': 0.7784026996625422, 'test_precision': 0.7571115973741794, 'test_recall': 0.8009259259259259, 'test_AUC': 0.8374802994483844, 'test_runtime': 1.6716, 'test_samples_per_second': 539.588, 'test_steps_per_second': 8.973}
195 ALBERT-L 5 8e-06 1500
Model seed is: 5, total, training informative and uninf samples: 4508 ,             1789, 1817


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6445
200,0.5453
300,0.4364
400,0.4342
500,0.3619
600,0.2965
700,0.3136
800,0.2085
900,0.1943
1000,0.1252


for ratio of 195, model of ALBERT-L, seed of 5 performance is:
 0.44789644012944985  and  {'test_loss': 0.9611594676971436, 'test_accuracy': 0.8126385809312638, 'test_f1': 0.8148959474260679, 'test_precision': 0.8303571428571429, 'test_recall': 0.8, 'test_AUC': 0.8683300115646762, 'test_runtime': 1.673, 'test_samples_per_second': 539.136, 'test_steps_per_second': 8.966}
195 ALBERT-L 1759 8e-06 1500
Model seed is: 1759, total, training informative and uninf samples: 4508 ,             1769, 1837


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6991
200,0.666
300,0.6847
400,0.6656
500,0.6947
600,0.5808
700,0.5236
800,0.4583
900,0.4551
1000,0.3287


for ratio of 195, model of ALBERT-L, seed of 1759 performance is:
 0.45501618122977344  and  {'test_loss': 0.5578673481941223, 'test_accuracy': 0.79490022172949, 'test_f1': 0.8029818956336527, 'test_precision': 0.8303964757709251, 'test_recall': 0.777319587628866, 'test_AUC': 0.8645207545303963, 'test_runtime': 1.6841, 'test_samples_per_second': 535.588, 'test_steps_per_second': 8.907}
195 ALBERT-L 323 8e-06 1500
Model seed is: 323, total, training informative and uninf samples: 4508 ,             1816, 1790


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6491
200,0.5577
300,0.491
400,0.4297
500,0.3927
600,0.3081
700,0.2852
800,0.1993
900,0.2079
1000,0.1519


for ratio of 195, model of ALBERT-L, seed of 323 performance is:
 0.4634304207119741  and  {'test_loss': 0.9768140912055969, 'test_accuracy': 0.8082039911308204, 'test_f1': 0.8092613009922821, 'test_precision': 0.7825159914712153, 'test_recall': 0.8378995433789954, 'test_AUC': 0.8735484569359157, 'test_runtime': 1.6868, 'test_samples_per_second': 534.747, 'test_steps_per_second': 8.893}
195 ALBERT-L 200 8e-06 1500
Model seed is: 200, total, training informative and uninf samples: 4508 ,             1813, 1793


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6472
200,0.5445
300,0.4491
400,0.4154
500,0.322
600,0.2941
700,0.2564
800,0.2082
900,0.132
1000,0.1175


for ratio of 195, model of ALBERT-L, seed of 200 performance is:
 0.44983818770226536  and  {'test_loss': 1.219646692276001, 'test_accuracy': 0.7915742793791575, 'test_f1': 0.7858769931662871, 'test_precision': 0.7894736842105263, 'test_recall': 0.782312925170068, 'test_AUC': 0.8475265738978165, 'test_runtime': 1.6828, 'test_samples_per_second': 536.024, 'test_steps_per_second': 8.914}
195 ALBERT-L 999 8e-06 1500
Model seed is: 999, total, training informative and uninf samples: 4508 ,             1815, 1791


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.636
200,0.5424
300,0.4588
400,0.4327
500,0.3707
600,0.2843
700,0.317
800,0.1927
900,0.209
1000,0.1226


for ratio of 195, model of ALBERT-L, seed of 999 performance is:
 0.458252427184466  and  {'test_loss': 0.9477020502090454, 'test_accuracy': 0.8159645232815964, 'test_f1': 0.8147321428571429, 'test_precision': 0.7986870897155361, 'test_recall': 0.8314350797266514, 'test_AUC': 0.8791087145830158, 'test_runtime': 1.6891, 'test_samples_per_second': 534.0, 'test_steps_per_second': 8.88}
195 DISRoBERTa-B 9741 2e-05 1500
Model seed is: 9741, total, training informative and uninf samples: 4508 ,             1809, 1797


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.639
200,0.541
300,0.4572
400,0.4027
500,0.3386
600,0.2859
700,0.2624
800,0.1978
900,0.1634
1000,0.1256


for ratio of 195, model of DISRoBERTa-B, seed of 9741 performance is:
 0.44660194174757284  and  {'test_loss': 1.0284759998321533, 'test_accuracy': 0.8126385809312638, 'test_f1': 0.8140814081408142, 'test_precision': 0.7974137931034483, 'test_recall': 0.8314606741573034, 'test_AUC': 0.8957637744941362, 'test_runtime': 0.304, 'test_samples_per_second': 2966.901, 'test_steps_per_second': 49.339}
195 DISRoBERTa-B 1694 2e-05 1500
Model seed is: 1694, total, training informative and uninf samples: 4508 ,             1810, 1796


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6141
200,0.5578
300,0.4517
400,0.4182
500,0.3439
600,0.2867
700,0.2835
800,0.1866
900,0.2188
1000,0.136


for ratio of 195, model of DISRoBERTa-B, seed of 1694 performance is:
 0.4880258899676375  and  {'test_loss': 0.9170410633087158, 'test_accuracy': 0.8148558758314856, 'test_f1': 0.8150609080841639, 'test_precision': 0.8017429193899782, 'test_recall': 0.8288288288288288, 'test_AUC': 0.8859711239623904, 'test_runtime': 0.2915, 'test_samples_per_second': 3094.488, 'test_steps_per_second': 51.46}
195 DISRoBERTa-B 6932 2e-05 1500
Model seed is: 6932, total, training informative and uninf samples: 4508 ,             1792, 1814


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.614
200,0.5274
300,0.4158
400,0.3827
500,0.3532
600,0.2916
700,0.2591
800,0.1692
900,0.2074
1000,0.1344


for ratio of 195, model of DISRoBERTa-B, seed of 6932 performance is:
 0.5035598705501618  and  {'test_loss': 1.113025188446045, 'test_accuracy': 0.7882483370288248, 'test_f1': 0.8, 'test_precision': 0.7748478701825557, 'test_recall': 0.8268398268398268, 'test_AUC': 0.864939984258166, 'test_runtime': 0.298, 'test_samples_per_second': 3026.748, 'test_steps_per_second': 50.334}
195 DISRoBERTa-B 94 2e-05 1500
Model seed is: 94, total, training informative and uninf samples: 4508 ,             1789, 1817


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.631
200,0.5441
300,0.4563
400,0.4092
500,0.3388
600,0.3185
700,0.2519
800,0.1999
900,0.2072
1000,0.1397


for ratio of 195, model of DISRoBERTa-B, seed of 94 performance is:
 0.4666666666666667  and  {'test_loss': 0.8496114611625671, 'test_accuracy': 0.8226164079822617, 'test_f1': 0.8333333333333333, 'test_precision': 0.8080808080808081, 'test_recall': 0.8602150537634409, 'test_AUC': 0.8924632760020668, 'test_runtime': 0.2981, 'test_samples_per_second': 3025.908, 'test_steps_per_second': 50.32}
195 DISRoBERTa-B 791 2e-05 1500
Model seed is: 791, total, training informative and uninf samples: 4508 ,             1822, 1784


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6188
200,0.5483
300,0.4367
400,0.3907
500,0.2928
600,0.2918
700,0.2847
800,0.1895
900,0.1921
1000,0.143


for ratio of 195, model of DISRoBERTa-B, seed of 791 performance is:
 0.47766990291262135  and  {'test_loss': 0.9917618036270142, 'test_accuracy': 0.8070953436807096, 'test_f1': 0.8044943820224719, 'test_precision': 0.7816593886462883, 'test_recall': 0.8287037037037037, 'test_AUC': 0.8848552009456264, 'test_runtime': 0.2935, 'test_samples_per_second': 3073.319, 'test_steps_per_second': 51.108}
195 DISRoBERTa-B 5 2e-05 1500
Model seed is: 5, total, training informative and uninf samples: 4508 ,             1789, 1817


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6276
200,0.5295
300,0.4217
400,0.4066
500,0.3217
600,0.2693
700,0.2609
800,0.1952
900,0.1906
1000,0.1368


for ratio of 195, model of DISRoBERTa-B, seed of 5 performance is:
 0.4666666666666667  and  {'test_loss': 1.059033989906311, 'test_accuracy': 0.8137472283813747, 'test_f1': 0.8260869565217391, 'test_precision': 0.7964071856287425, 'test_recall': 0.8580645161290322, 'test_AUC': 0.8697866686351222, 'test_runtime': 0.2965, 'test_samples_per_second': 3041.863, 'test_steps_per_second': 50.585}
195 DISRoBERTa-B 1759 2e-05 1500
Model seed is: 1759, total, training informative and uninf samples: 4508 ,             1769, 1837


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6311
200,0.5205
300,0.4669
400,0.4102
500,0.3154
600,0.294
700,0.2591
800,0.1817
900,0.2136
1000,0.0978


for ratio of 195, model of DISRoBERTa-B, seed of 1759 performance is:
 0.4330097087378641  and  {'test_loss': 0.9675182700157166, 'test_accuracy': 0.8137472283813747, 'test_f1': 0.8212765957446808, 'test_precision': 0.8483516483516483, 'test_recall': 0.7958762886597938, 'test_AUC': 0.8918193280427205, 'test_runtime': 0.2998, 'test_samples_per_second': 3008.448, 'test_steps_per_second': 50.03}
195 DISRoBERTa-B 323 2e-05 1500
Model seed is: 323, total, training informative and uninf samples: 4508 ,             1816, 1790


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6186
200,0.531
300,0.4379
400,0.4014
500,0.3685
600,0.2942
700,0.2638
800,0.2008
900,0.1952
1000,0.1338


for ratio of 195, model of DISRoBERTa-B, seed of 323 performance is:
 0.4796116504854369  and  {'test_loss': 0.9010224938392639, 'test_accuracy': 0.8203991130820399, 'test_f1': 0.8261802575107297, 'test_precision': 0.7793522267206477, 'test_recall': 0.8789954337899544, 'test_AUC': 0.906333648244371, 'test_runtime': 0.2979, 'test_samples_per_second': 3027.937, 'test_steps_per_second': 50.354}
195 DISRoBERTa-B 200 2e-05 1500
Model seed is: 200, total, training informative and uninf samples: 4508 ,             1813, 1793


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6109
200,0.5235
300,0.4195
400,0.3999
500,0.3269
600,0.2831
700,0.2518
800,0.2037
900,0.1896
1000,0.1172


for ratio of 195, model of DISRoBERTa-B, seed of 200 performance is:
 0.4854368932038835  and  {'test_loss': 0.9777347445487976, 'test_accuracy': 0.79490022172949, 'test_f1': 0.7978142076502733, 'test_precision': 0.770042194092827, 'test_recall': 0.8276643990929705, 'test_AUC': 0.8807482501315784, 'test_runtime': 0.2947, 'test_samples_per_second': 3060.795, 'test_steps_per_second': 50.9}
195 DISRoBERTa-B 999 2e-05 1500
Model seed is: 999, total, training informative and uninf samples: 4508 ,             1815, 1791


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6296
200,0.5259
300,0.4275
400,0.4044
500,0.3385
600,0.2529
700,0.2793
800,0.1743
900,0.18
1000,0.1138


for ratio of 195, model of DISRoBERTa-B, seed of 999 performance is:
 0.4627831715210356  and  {'test_loss': 0.9481362104415894, 'test_accuracy': 0.8170731707317073, 'test_f1': 0.8200654307524538, 'test_precision': 0.7866108786610879, 'test_recall': 0.856492027334852, 'test_AUC': 0.900972660228184, 'test_runtime': 0.2946, 'test_samples_per_second': 3062.237, 'test_steps_per_second': 50.924}
195 XLNet-B 9741 2e-05 1500
Model seed is: 9741, total, training informative and uninf samples: 4508 ,             1809, 1797


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6479
200,0.5565
300,0.4862
400,0.4461
500,0.3792
600,0.294
700,0.2662
800,0.1952
900,0.1845
1000,0.1127


for ratio of 195, model of XLNet-B, seed of 9741 performance is:
 0.5423948220064725  and  {'test_loss': 1.1859581470489502, 'test_accuracy': 0.8159645232815964, 'test_f1': 0.8222698072805139, 'test_precision': 0.7852760736196319, 'test_recall': 0.8629213483146068, 'test_AUC': 0.884813020922971, 'test_runtime': 0.6981, 'test_samples_per_second': 1292.063, 'test_steps_per_second': 21.487}
195 XLNet-B 1694 2e-05 1500
Model seed is: 1694, total, training informative and uninf samples: 4508 ,             1810, 1796


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6269
200,0.5785
300,0.4752
400,0.4291
500,0.3595
600,0.296
700,0.2539
800,0.1813
900,0.1772
1000,0.1081


for ratio of 195, model of XLNet-B, seed of 1694 performance is:
 0.5501618122977346  and  {'test_loss': 1.3135398626327515, 'test_accuracy': 0.7838137472283814, 'test_f1': 0.790547798066595, 'test_precision': 0.75564681724846, 'test_recall': 0.8288288288288288, 'test_AUC': 0.8689759628624257, 'test_runtime': 0.7105, 'test_samples_per_second': 1269.565, 'test_steps_per_second': 21.113}
195 XLNet-B 6932 2e-05 1500
Model seed is: 6932, total, training informative and uninf samples: 4508 ,             1792, 1814


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6364
200,0.5586
300,0.4651
400,0.46
500,0.3846
600,0.2706
700,0.2718
800,0.166
900,0.2044
1000,0.1334


for ratio of 195, model of XLNet-B, seed of 6932 performance is:
 0.5223300970873787  and  {'test_loss': 1.2650525569915771, 'test_accuracy': 0.79490022172949, 'test_f1': 0.8054679284963197, 'test_precision': 0.7832310838445807, 'test_recall': 0.829004329004329, 'test_AUC': 0.8713252656434474, 'test_runtime': 0.711, 'test_samples_per_second': 1268.65, 'test_steps_per_second': 21.097}
195 XLNet-B 94 2e-05 1500
Model seed is: 94, total, training informative and uninf samples: 4508 ,             1789, 1817


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6161
200,0.5527
300,0.4682
400,0.4461
500,0.3314
600,0.3033
700,0.2426
800,0.202
900,0.1882
1000,0.1038


for ratio of 195, model of XLNet-B, seed of 94 performance is:
 0.5035598705501618  and  {'test_loss': 1.0838929414749146, 'test_accuracy': 0.8037694013303769, 'test_f1': 0.818833162743091, 'test_precision': 0.78125, 'test_recall': 0.8602150537634409, 'test_AUC': 0.8872468689254693, 'test_runtime': 0.7031, 'test_samples_per_second': 1282.876, 'test_steps_per_second': 21.334}
195 XLNet-B 791 2e-05 1500
Model seed is: 791, total, training informative and uninf samples: 4508 ,             1822, 1784


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6293
200,0.5838
300,0.4815
400,0.4286
500,0.3656
600,0.3018
700,0.2841
800,0.1802
900,0.2108
1000,0.1246


for ratio of 195, model of XLNet-B, seed of 791 performance is:
 0.47508090614886733  and  {'test_loss': 1.2133146524429321, 'test_accuracy': 0.8048780487804879, 'test_f1': 0.8044444444444444, 'test_precision': 0.7735042735042735, 'test_recall': 0.8379629629629629, 'test_AUC': 0.8724143026004728, 'test_runtime': 0.7199, 'test_samples_per_second': 1253.027, 'test_steps_per_second': 20.837}
195 XLNet-B 5 2e-05 1500
Model seed is: 5, total, training informative and uninf samples: 4508 ,             1789, 1817


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6529
200,0.563
300,0.4508
400,0.4451
500,0.3714
600,0.3118
700,0.2774
800,0.2151
900,0.1923
1000,0.1264


for ratio of 195, model of XLNet-B, seed of 5 performance is:
 0.5158576051779935  and  {'test_loss': 1.0320466756820679, 'test_accuracy': 0.8226164079822617, 'test_f1': 0.8370672097759675, 'test_precision': 0.7949709864603481, 'test_recall': 0.8838709677419355, 'test_AUC': 0.8907261140227849, 'test_runtime': 0.7084, 'test_samples_per_second': 1273.251, 'test_steps_per_second': 21.174}
195 XLNet-B 1759 2e-05 1500
Model seed is: 1759, total, training informative and uninf samples: 4508 ,             1769, 1837


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6414
200,0.582
300,0.4826
400,0.4469
500,0.3418
600,0.3094
700,0.2427
800,0.207
900,0.1859
1000,0.0931


for ratio of 195, model of XLNet-B, seed of 1759 performance is:
 0.516504854368932  and  {'test_loss': 1.2644715309143066, 'test_accuracy': 0.8093126385809313, 'test_f1': 0.8297029702970297, 'test_precision': 0.7980952380952381, 'test_recall': 0.8639175257731959, 'test_AUC': 0.8772330589136939, 'test_runtime': 0.7006, 'test_samples_per_second': 1287.552, 'test_steps_per_second': 21.412}
195 XLNet-B 323 2e-05 1500
Model seed is: 323, total, training informative and uninf samples: 4508 ,             1816, 1790


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6492
200,0.574
300,0.5081
400,0.4385
500,0.412
600,0.3387
700,0.2592
800,0.2062
900,0.2308
1000,0.1208


for ratio of 195, model of XLNet-B, seed of 323 performance is:
 0.5320388349514563  and  {'test_loss': 1.078986644744873, 'test_accuracy': 0.8170731707317073, 'test_f1': 0.8250265111346766, 'test_precision': 0.7702970297029703, 'test_recall': 0.8881278538812786, 'test_AUC': 0.894047197291765, 'test_runtime': 0.6998, 'test_samples_per_second': 1288.996, 'test_steps_per_second': 21.436}
195 XLNet-B 200 2e-05 1500
Model seed is: 200, total, training informative and uninf samples: 4508 ,             1813, 1793


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6408
200,0.5366
300,0.4609
400,0.4646
500,0.3573
600,0.3164
700,0.263
800,0.2233
900,0.184
1000,0.1117


for ratio of 195, model of XLNet-B, seed of 200 performance is:
 0.5022653721682848  and  {'test_loss': 1.3557497262954712, 'test_accuracy': 0.7749445676274944, 'test_f1': 0.7761852260198457, 'test_precision': 0.7553648068669528, 'test_recall': 0.7981859410430839, 'test_AUC': 0.8573002592215484, 'test_runtime': 0.708, 'test_samples_per_second': 1274.072, 'test_steps_per_second': 21.187}
195 XLNet-B 999 2e-05 1500
Model seed is: 999, total, training informative and uninf samples: 4508 ,             1815, 1791


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.651
200,0.5481
300,0.4651
400,0.4308
500,0.3772
600,0.272
700,0.2758
800,0.1755
900,0.1821
1000,0.1158


for ratio of 195, model of XLNet-B, seed of 999 performance is:
 0.5359223300970873  and  {'test_loss': 1.1543192863464355, 'test_accuracy': 0.8015521064301552, 'test_f1': 0.8097768331562167, 'test_precision': 0.7589641434262948, 'test_recall': 0.8678815489749431, 'test_AUC': 0.884835454621489, 'test_runtime': 0.7072, 'test_samples_per_second': 1275.403, 'test_steps_per_second': 21.21}
195 XLNet-L 9741 1e-05 1125
Model seed is: 9741, total, training informative and uninf samples: 4508 ,             1809, 1797


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.675
200,0.5912
300,0.5126
400,0.4554
500,0.3814
600,0.3115
700,0.2632
800,0.1957
900,0.1335
1000,0.1079


for ratio of 195, model of XLNet-L, seed of 9741 performance is:
 0.512621359223301  and  {'test_loss': 0.9334791302680969, 'test_accuracy': 0.770509977827051, 'test_f1': 0.7786096256684492, 'test_precision': 0.7428571428571429, 'test_recall': 0.8179775280898877, 'test_AUC': 0.8458092592137292, 'test_runtime': 1.9139, 'test_samples_per_second': 471.279, 'test_steps_per_second': 7.837}
195 XLNet-L 1694 1e-05 1125
Model seed is: 1694, total, training informative and uninf samples: 4508 ,             1810, 1796


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.696
200,0.6319
300,0.5539
400,0.4833
500,0.4196
600,0.3566
700,0.2992
800,0.2552
900,0.1985
1000,0.1549


for ratio of 195, model of XLNet-L, seed of 1694 performance is:
 0.49902912621359224  and  {'test_loss': 0.6797966957092285, 'test_accuracy': 0.7971175166297118, 'test_f1': 0.7927519818799548, 'test_precision': 0.7972665148063781, 'test_recall': 0.7882882882882883, 'test_AUC': 0.8700824186632047, 'test_runtime': 1.9347, 'test_samples_per_second': 466.215, 'test_steps_per_second': 7.753}
195 XLNet-L 6932 1e-05 1125
Model seed is: 6932, total, training informative and uninf samples: 4508 ,             1792, 1814


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.7098
200,0.6412
300,0.5697
400,0.4672
500,0.4645
600,0.344
700,0.3143
800,0.2463
900,0.2038
1000,0.1725


for ratio of 195, model of XLNet-L, seed of 6932 performance is:
 0.427831715210356  and  {'test_loss': 0.816497802734375, 'test_accuracy': 0.7738359201773836, 'test_f1': 0.7660550458715596, 'test_precision': 0.8146341463414634, 'test_recall': 0.7229437229437229, 'test_AUC': 0.8493309720582448, 'test_runtime': 1.9114, 'test_samples_per_second': 471.895, 'test_steps_per_second': 7.847}
195 XLNet-L 94 1e-05 1125
Model seed is: 94, total, training informative and uninf samples: 4508 ,             1789, 1817


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.694
200,0.6235
300,0.542
400,0.475
500,0.3844
600,0.338
700,0.2632
800,0.1999
900,0.1677
1000,0.135


for ratio of 195, model of XLNet-L, seed of 94 performance is:
 0.5119741100323625  and  {'test_loss': 0.6817896962165833, 'test_accuracy': 0.8070953436807096, 'test_f1': 0.8217213114754098, 'test_precision': 0.7847358121330724, 'test_recall': 0.8623655913978494, 'test_AUC': 0.8848601166309884, 'test_runtime': 1.9188, 'test_samples_per_second': 470.076, 'test_steps_per_second': 7.817}
195 XLNet-L 791 1e-05 1125
Model seed is: 791, total, training informative and uninf samples: 4508 ,             1822, 1784


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.7026
200,0.6229
300,0.5185
400,0.4665
500,0.3865
600,0.3241
700,0.279
800,0.2202
900,0.1811
1000,0.1422


for ratio of 195, model of XLNet-L, seed of 791 performance is:
 0.5255663430420712  and  {'test_loss': 0.6807739734649658, 'test_accuracy': 0.7937915742793792, 'test_f1': 0.7960526315789473, 'test_precision': 0.75625, 'test_recall': 0.8402777777777778, 'test_AUC': 0.8679570527974784, 'test_runtime': 1.9213, 'test_samples_per_second': 469.466, 'test_steps_per_second': 7.807}
195 XLNet-L 5 1e-05 1125
Model seed is: 5, total, training informative and uninf samples: 4508 ,             1789, 1817


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6705
200,0.5557
300,0.4651
400,0.4211
500,0.3278
600,0.2745
700,0.1861
800,0.163
900,0.1087
1000,0.0894


for ratio of 195, model of XLNet-L, seed of 5 performance is:
 0.5074433656957928  and  {'test_loss': 0.9754342436790466, 'test_accuracy': 0.7849223946784922, 'test_f1': 0.7909482758620691, 'test_precision': 0.7926565874730022, 'test_recall': 0.789247311827957, 'test_AUC': 0.8518983292733938, 'test_runtime': 1.9299, 'test_samples_per_second': 467.375, 'test_steps_per_second': 7.772}
195 XLNet-L 1759 1e-05 1125
Model seed is: 1759, total, training informative and uninf samples: 4508 ,             1769, 1837


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.7059
200,0.6154
300,0.5161
400,0.4364
500,0.3664
600,0.3042
700,0.2331
800,0.1933
900,0.145
1000,0.1143


for ratio of 195, model of XLNet-L, seed of 1759 performance is:
 0.47249190938511326  and  {'test_loss': 0.7432101964950562, 'test_accuracy': 0.7893569844789357, 'test_f1': 0.8057259713701431, 'test_precision': 0.7991886409736308, 'test_recall': 0.8123711340206186, 'test_AUC': 0.8644960320403471, 'test_runtime': 1.9263, 'test_samples_per_second': 468.254, 'test_steps_per_second': 7.787}
195 XLNet-L 323 1e-05 1125
Model seed is: 323, total, training informative and uninf samples: 4508 ,             1816, 1790


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.661
200,0.5572
300,0.4991
400,0.3908
500,0.3221
600,0.2683
700,0.1731
800,0.1433
900,0.1137
1000,0.0895


for ratio of 195, model of XLNet-L, seed of 323 performance is:
 0.5527508090614887  and  {'test_loss': 0.9268752932548523, 'test_accuracy': 0.79490022172949, 'test_f1': 0.8042328042328043, 'test_precision': 0.7495069033530573, 'test_recall': 0.867579908675799, 'test_AUC': 0.8760382223271925, 'test_runtime': 1.9165, 'test_samples_per_second': 470.643, 'test_steps_per_second': 7.827}
195 XLNet-L 200 1e-05 1125
Model seed is: 200, total, training informative and uninf samples: 4508 ,             1813, 1793


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.697
200,0.6483
300,0.5817
400,0.5224
500,0.4579
600,0.4174
700,0.3748
800,0.3269
900,0.277
1000,0.2505


for ratio of 195, model of XLNet-L, seed of 200 performance is:
 0.5255663430420712  and  {'test_loss': 0.6271263360977173, 'test_accuracy': 0.7694013303769401, 'test_f1': 0.7748917748917749, 'test_precision': 0.7412008281573499, 'test_recall': 0.8117913832199547, 'test_AUC': 0.8457951510322133, 'test_runtime': 1.9271, 'test_samples_per_second': 468.056, 'test_steps_per_second': 7.784}
195 XLNet-L 999 1e-05 1125
Model seed is: 999, total, training informative and uninf samples: 4508 ,             1815, 1791


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.7022
200,0.6239
300,0.5553
400,0.5022
500,0.4474
600,0.3838
700,0.3691
800,0.3444
900,0.2812
1000,0.2447


for ratio of 195, model of XLNet-L, seed of 999 performance is:
 0.5411003236245955  and  {'test_loss': 0.5284166932106018, 'test_accuracy': 0.8104212860310421, 'test_f1': 0.8171122994652406, 'test_precision': 0.7701612903225806, 'test_recall': 0.8701594533029613, 'test_AUC': 0.8738887221596303, 'test_runtime': 1.9176, 'test_samples_per_second': 470.374, 'test_steps_per_second': 7.822}
195 DEBERT-B 9741 3e-05 1500
Model seed is: 9741, total, training informative and uninf samples: 4508 ,             1809, 1797


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6968
200,0.6386
300,0.4762
400,0.4287
500,0.3388
600,0.2815
700,0.2503
800,0.1573
900,0.1794
1000,0.0928


for ratio of 195, model of DEBERT-B, seed of 9741 performance is:
 0.4627831715210356  and  {'test_loss': 1.0500608682632446, 'test_accuracy': 0.8203991130820399, 'test_f1': 0.8239130434782608, 'test_precision': 0.7978947368421052, 'test_recall': 0.851685393258427, 'test_AUC': 0.9009957465640598, 'test_runtime': 0.6345, 'test_samples_per_second': 1421.538, 'test_steps_per_second': 23.64}
195 DEBERT-B 1694 3e-05 1500
Model seed is: 1694, total, training informative and uninf samples: 4508 ,             1810, 1796


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6768
200,0.5564
300,0.4193
400,0.39
500,0.2775
600,0.1888
700,0.1754
800,0.0846
900,0.1244
1000,0.0302


for ratio of 195, model of DEBERT-B, seed of 1694 performance is:
 0.4601941747572815  and  {'test_loss': 1.1356453895568848, 'test_accuracy': 0.8325942350332595, 'test_f1': 0.8324084350721421, 'test_precision': 0.8205689277899344, 'test_recall': 0.8445945945945946, 'test_AUC': 0.8949407923207049, 'test_runtime': 0.6361, 'test_samples_per_second': 1418.037, 'test_steps_per_second': 23.582}
195 DEBERT-B 6932 3e-05 1500
Model seed is: 6932, total, training informative and uninf samples: 4508 ,             1792, 1814


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6531
200,0.5284
300,0.3661
400,0.3805
500,0.2796
600,0.1771
700,0.1824
800,0.093
900,0.1205
1000,0.0302


for ratio of 195, model of DEBERT-B, seed of 6932 performance is:
 0.4854368932038835  and  {'test_loss': 1.2840454578399658, 'test_accuracy': 0.8082039911308204, 'test_f1': 0.8177028451001054, 'test_precision': 0.7967145790554415, 'test_recall': 0.8398268398268398, 'test_AUC': 0.878168044077135, 'test_runtime': 0.6506, 'test_samples_per_second': 1386.426, 'test_steps_per_second': 23.056}
195 DEBERT-B 94 3e-05 1500
Model seed is: 94, total, training informative and uninf samples: 4508 ,             1789, 1817


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6381
200,0.5168
300,0.3904
400,0.351
500,0.2784
600,0.2284
700,0.1833
800,0.1116
900,0.1078
1000,0.0417


for ratio of 195, model of DEBERT-B, seed of 94 performance is:
 0.44660194174757284  and  {'test_loss': 1.0673693418502808, 'test_accuracy': 0.835920177383592, 'test_f1': 0.8455114822546973, 'test_precision': 0.821501014198783, 'test_recall': 0.8709677419354839, 'test_AUC': 0.8932457370635565, 'test_runtime': 0.6309, 'test_samples_per_second': 1429.733, 'test_steps_per_second': 23.776}
195 DEBERT-B 791 3e-05 1500
Model seed is: 791, total, training informative and uninf samples: 4508 ,             1822, 1784


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.5941
200,0.5207
300,0.3922
400,0.3246
500,0.2426
600,0.1938
700,0.1963
800,0.0841
900,0.1113
1000,0.0435


for ratio of 195, model of DEBERT-B, seed of 791 performance is:
 0.5016181229773463  and  {'test_loss': 1.254754900932312, 'test_accuracy': 0.8059866962305987, 'test_f1': 0.8070562293274532, 'test_precision': 0.7705263157894737, 'test_recall': 0.8472222222222222, 'test_AUC': 0.869739952718676, 'test_runtime': 0.6366, 'test_samples_per_second': 1417.013, 'test_steps_per_second': 23.565}
195 DEBERT-B 5 3e-05 1500
Model seed is: 5, total, training informative and uninf samples: 4508 ,             1789, 1817


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6681
200,0.6
300,0.4395
400,0.4088
500,0.3067
600,0.2267
700,0.2003
800,0.1294
900,0.1284
1000,0.0621


for ratio of 195, model of DEBERT-B, seed of 5 performance is:
 0.46601941747572817  and  {'test_loss': 1.111245036125183, 'test_accuracy': 0.8248337028824834, 'test_f1': 0.8329809725158563, 'test_precision': 0.8191268191268192, 'test_recall': 0.8473118279569892, 'test_AUC': 0.8965035309170544, 'test_runtime': 0.6299, 'test_samples_per_second': 1431.938, 'test_steps_per_second': 23.813}
195 DEBERT-B 1759 3e-05 1500
Model seed is: 1759, total, training informative and uninf samples: 4508 ,             1769, 1837


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6687
200,0.5479
300,0.4363
400,0.3779
500,0.2443
600,0.196
700,0.1884
800,0.1272
900,0.112
1000,0.0461


for ratio of 195, model of DEBERT-B, seed of 1759 performance is:
 0.5093851132686085  and  {'test_loss': 1.2456084489822388, 'test_accuracy': 0.8192904656319291, 'test_f1': 0.8400392541707556, 'test_precision': 0.8014981273408239, 'test_recall': 0.8824742268041237, 'test_AUC': 0.889460802492027, 'test_runtime': 0.6257, 'test_samples_per_second': 1441.624, 'test_steps_per_second': 23.974}
195 DEBERT-B 323 3e-05 1500
Model seed is: 323, total, training informative and uninf samples: 4508 ,             1816, 1790


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6473
200,0.5154
300,0.3902
400,0.3638
500,0.2993
600,0.2021
700,0.143
800,0.1038
900,0.1237
1000,0.0614


for ratio of 195, model of DEBERT-B, seed of 323 performance is:
 0.458252427184466  and  {'test_loss': 1.0602679252624512, 'test_accuracy': 0.8292682926829268, 'test_f1': 0.8315098468271336, 'test_precision': 0.7983193277310925, 'test_recall': 0.867579908675799, 'test_AUC': 0.9127401196661943, 'test_runtime': 0.633, 'test_samples_per_second': 1425.013, 'test_steps_per_second': 23.698}
195 DEBERT-B 200 3e-05 1500
Model seed is: 200, total, training informative and uninf samples: 4508 ,             1813, 1793


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6303
200,0.5383
300,0.3898
400,0.3625
500,0.2827
600,0.2159
700,0.1577
800,0.1061
900,0.0772
1000,0.0613


for ratio of 195, model of DEBERT-B, seed of 200 performance is:
 0.458252427184466  and  {'test_loss': 1.0931473970413208, 'test_accuracy': 0.811529933481153, 'test_f1': 0.8076923076923078, 'test_precision': 0.8058690744920993, 'test_recall': 0.8095238095238095, 'test_AUC': 0.8932322024977742, 'test_runtime': 0.6397, 'test_samples_per_second': 1409.968, 'test_steps_per_second': 23.447}
195 DEBERT-B 999 3e-05 1500
Model seed is: 999, total, training informative and uninf samples: 4508 ,             1815, 1791


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6873
200,0.6024
300,0.4497
400,0.4259
500,0.3401
600,0.2354
700,0.2335
800,0.1512
900,0.1608
1000,0.0847


for ratio of 195, model of DEBERT-B, seed of 999 performance is:
 0.4919093851132686  and  {'test_loss': 1.0960094928741455, 'test_accuracy': 0.8270509977827051, 'test_f1': 0.832618025751073, 'test_precision': 0.7870182555780934, 'test_recall': 0.8838268792710706, 'test_AUC': 0.901258013254156, 'test_runtime': 0.6327, 'test_samples_per_second': 1425.717, 'test_steps_per_second': 23.709}
195 DEBERT-L 9741 1e-05 1500
Model seed is: 9741, total, training informative and uninf samples: 4508 ,             1809, 1797


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.637
200,0.5078
300,0.3642
400,0.3246
500,0.2731
600,0.1867
700,0.1727
800,0.0876
900,0.0944
1000,0.0651


for ratio of 195, model of DEBERT-L, seed of 9741 performance is:
 0.46601941747572817  and  {'test_loss': 1.1999164819717407, 'test_accuracy': 0.8292682926829268, 'test_f1': 0.8326086956521739, 'test_precision': 0.8063157894736842, 'test_recall': 0.8606741573033708, 'test_AUC': 0.8931232021242592, 'test_runtime': 1.7342, 'test_samples_per_second': 520.125, 'test_steps_per_second': 8.65}
195 DEBERT-L 1694 1e-05 1500
Model seed is: 1694, total, training informative and uninf samples: 4508 ,             1810, 1796


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6902
200,0.597
300,0.4581
400,0.4284
500,0.3523
600,0.2799
700,0.2349
800,0.1655
900,0.1801
1000,0.0962


for ratio of 195, model of DEBERT-L, seed of 1694 performance is:
 0.4912621359223301  and  {'test_loss': 1.071107268333435, 'test_accuracy': 0.8292682926829268, 'test_f1': 0.8315098468271335, 'test_precision': 0.8085106382978723, 'test_recall': 0.8558558558558559, 'test_AUC': 0.8692906880679806, 'test_runtime': 1.7293, 'test_samples_per_second': 521.599, 'test_steps_per_second': 8.674}
195 DEBERT-L 6932 1e-05 1500
Model seed is: 6932, total, training informative and uninf samples: 4508 ,             1792, 1814


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6248
200,0.5475
300,0.4283
400,0.4028
500,0.3346
600,0.2383
700,0.2256
800,0.1454
900,0.196
1000,0.0902


for ratio of 195, model of DEBERT-L, seed of 6932 performance is:
 0.5022653721682848  and  {'test_loss': 1.0487315654754639, 'test_accuracy': 0.8248337028824834, 'test_f1': 0.8319148936170214, 'test_precision': 0.8179916317991632, 'test_recall': 0.8463203463203464, 'test_AUC': 0.8874606454151909, 'test_runtime': 1.7273, 'test_samples_per_second': 522.216, 'test_steps_per_second': 8.684}
195 DEBERT-L 94 1e-05 1500
Model seed is: 94, total, training informative and uninf samples: 4508 ,             1789, 1817


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.662
200,0.5383
300,0.4105
400,0.3661
500,0.2901
600,0.2264
700,0.1727
800,0.1261
900,0.1263
1000,0.0697


for ratio of 195, model of DEBERT-L, seed of 94 performance is:
 0.47313915857605177  and  {'test_loss': 1.119978666305542, 'test_accuracy': 0.844789356984479, 'test_f1': 0.8535564853556485, 'test_precision': 0.8309572301425662, 'test_recall': 0.8774193548387097, 'test_AUC': 0.9061489628700081, 'test_runtime': 1.7331, 'test_samples_per_second': 520.461, 'test_steps_per_second': 8.655}
195 DEBERT-L 791 1e-05 1500
Model seed is: 791, total, training informative and uninf samples: 4508 ,             1822, 1784


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6086
200,0.5094
300,0.3497
400,0.3004
500,0.2117
600,0.1637
700,0.159
800,0.055
900,0.0787
1000,0.0294


for ratio of 195, model of DEBERT-L, seed of 791 performance is:
 0.5158576051779935  and  {'test_loss': 1.3031014204025269, 'test_accuracy': 0.8237250554323725, 'test_f1': 0.8250825082508251, 'test_precision': 0.7861635220125787, 'test_recall': 0.8680555555555556, 'test_AUC': 0.8780043341213554, 'test_runtime': 1.7455, 'test_samples_per_second': 516.751, 'test_steps_per_second': 8.593}
195 DEBERT-L 5 1e-05 1500
Model seed is: 5, total, training informative and uninf samples: 4508 ,             1789, 1817


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6404
200,0.54
300,0.3786
400,0.3722
500,0.2904
600,0.2223
700,0.2197
800,0.1454
900,0.1541
1000,0.0662


for ratio of 195, model of DEBERT-L, seed of 5 performance is:
 0.46601941747572817  and  {'test_loss': 1.20796799659729, 'test_accuracy': 0.8270509977827051, 'test_f1': 0.837160751565762, 'test_precision': 0.8133874239350912, 'test_recall': 0.8623655913978494, 'test_AUC': 0.8914101523092444, 'test_runtime': 1.7273, 'test_samples_per_second': 522.211, 'test_steps_per_second': 8.684}
195 DEBERT-L 1759 1e-05 1500
Model seed is: 1759, total, training informative and uninf samples: 4508 ,             1769, 1837


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6586
200,0.5457
300,0.4492
400,0.3968
500,0.2862
600,0.2691
700,0.2047
800,0.1326
900,0.1386
1000,0.07


for ratio of 195, model of DEBERT-L, seed of 1759 performance is:
 0.47055016181229775  and  {'test_loss': 1.3397939205169678, 'test_accuracy': 0.8082039911308204, 'test_f1': 0.8264794383149449, 'test_precision': 0.8046875, 'test_recall': 0.8494845360824742, 'test_AUC': 0.8683131845039432, 'test_runtime': 1.7351, 'test_samples_per_second': 519.854, 'test_steps_per_second': 8.645}
195 DEBERT-L 323 1e-05 1500
Model seed is: 323, total, training informative and uninf samples: 4508 ,             1816, 1790


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.603
200,0.4773
300,0.3648
400,0.3336
500,0.2735
600,0.1815
700,0.1614
800,0.0759
900,0.0965
1000,0.0297


for ratio of 195, model of DEBERT-L, seed of 323 performance is:
 0.4802588996763754  and  {'test_loss': 1.0789984464645386, 'test_accuracy': 0.8503325942350333, 'test_f1': 0.8549946294307198, 'test_precision': 0.8073022312373225, 'test_recall': 0.908675799086758, 'test_AUC': 0.9139603999370177, 'test_runtime': 1.7326, 'test_samples_per_second': 520.603, 'test_steps_per_second': 8.657}
195 DEBERT-L 200 1e-05 1500
Model seed is: 200, total, training informative and uninf samples: 4508 ,             1813, 1793


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6525
200,0.5696
300,0.4303
400,0.3975
500,0.3122
600,0.2393
700,0.2283
800,0.1697
900,0.1152
1000,0.0596


for ratio of 195, model of DEBERT-L, seed of 200 performance is:
 0.4912621359223301  and  {'test_loss': 0.9550820589065552, 'test_accuracy': 0.8425720620842572, 'test_f1': 0.8473118279569892, 'test_precision': 0.8057259713701431, 'test_recall': 0.8934240362811792, 'test_AUC': 0.9055784280451153, 'test_runtime': 1.7419, 'test_samples_per_second': 517.838, 'test_steps_per_second': 8.612}
195 DEBERT-L 999 1e-05 1500
Model seed is: 999, total, training informative and uninf samples: 4508 ,             1815, 1791


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6473
200,0.5255
300,0.386
400,0.3606
500,0.2723
600,0.2041
700,0.2105
800,0.1189
900,0.1201
1000,0.0766


for ratio of 195, model of DEBERT-L, seed of 999 performance is:
 0.49449838187702266  and  {'test_loss': 1.0488128662109375, 'test_accuracy': 0.835920177383592, 'test_f1': 0.8401727861771058, 'test_precision': 0.7987679671457906, 'test_recall': 0.8861047835990888, 'test_AUC': 0.9054645104473648, 'test_runtime': 1.7436, 'test_samples_per_second': 517.311, 'test_steps_per_second': 8.603}
195 XLM-B 9741 2e-05 1500
Model seed is: 9741, total, training informative and uninf samples: 4508 ,             1809, 1797


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6712
200,0.5842
300,0.4984
400,0.4836
500,0.4035
600,0.3366
700,0.2899
800,0.2555
900,0.2481
1000,0.1792


for ratio of 195, model of XLM-B, seed of 9741 performance is:
 0.44336569579288027  and  {'test_loss': 0.8043178915977478, 'test_accuracy': 0.8226164079822617, 'test_f1': 0.8283261802575107, 'test_precision': 0.7926078028747433, 'test_recall': 0.8674157303370786, 'test_AUC': 0.9062523049688982, 'test_runtime': 0.5149, 'test_samples_per_second': 1751.744, 'test_steps_per_second': 29.131}
195 XLM-B 1694 2e-05 1500
Model seed is: 1694, total, training informative and uninf samples: 4508 ,             1810, 1796


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6459
200,0.5998
300,0.5031
400,0.4753
500,0.399
600,0.3707
700,0.3167
800,0.2787
900,0.2787
1000,0.2


for ratio of 195, model of XLM-B, seed of 1694 performance is:
 0.47184466019417476  and  {'test_loss': 0.855659544467926, 'test_accuracy': 0.8137472283813747, 'test_f1': 0.8149779735682819, 'test_precision': 0.7974137931034483, 'test_recall': 0.8333333333333334, 'test_AUC': 0.8950293087847672, 'test_runtime': 0.5195, 'test_samples_per_second': 1736.185, 'test_steps_per_second': 28.872}
195 XLM-B 6932 2e-05 1500
Model seed is: 6932, total, training informative and uninf samples: 4508 ,             1792, 1814


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6672
200,0.5725
300,0.482
400,0.4529
500,0.4169
600,0.3323
700,0.3137
800,0.236
900,0.2953
1000,0.1888


for ratio of 195, model of XLM-B, seed of 6932 performance is:
 0.4925566343042071  and  {'test_loss': 1.0114965438842773, 'test_accuracy': 0.7982261640798226, 'test_f1': 0.8076109936575053, 'test_precision': 0.7892561983471075, 'test_recall': 0.8268398268398268, 'test_AUC': 0.8700265643447462, 'test_runtime': 0.5142, 'test_samples_per_second': 1754.268, 'test_steps_per_second': 29.173}
195 XLM-B 94 2e-05 1500
Model seed is: 94, total, training informative and uninf samples: 4508 ,             1789, 1817


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6637
200,0.6134
300,0.5246
400,0.4801
500,0.4148
600,0.378
700,0.3188
800,0.281
900,0.2773
1000,0.216


for ratio of 195, model of XLM-B, seed of 94 performance is:
 0.47055016181229775  and  {'test_loss': 0.7680992484092712, 'test_accuracy': 0.8226164079822617, 'test_f1': 0.8360655737704918, 'test_precision': 0.7984344422700587, 'test_recall': 0.8774193548387097, 'test_AUC': 0.9025811372751655, 'test_runtime': 0.5229, 'test_samples_per_second': 1725.114, 'test_steps_per_second': 28.688}
195 XLM-B 791 2e-05 1500
Model seed is: 791, total, training informative and uninf samples: 4508 ,             1822, 1784


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6579
200,0.6069
300,0.5128
400,0.4596
500,0.3599
600,0.3578
700,0.36
800,0.2572
900,0.2467
1000,0.2122


for ratio of 195, model of XLM-B, seed of 791 performance is:
 0.49967637540453075  and  {'test_loss': 0.9557546377182007, 'test_accuracy': 0.8004434589800443, 'test_f1': 0.8030634573304156, 'test_precision': 0.7614107883817427, 'test_recall': 0.8495370370370371, 'test_AUC': 0.8735569345941686, 'test_runtime': 0.5162, 'test_samples_per_second': 1747.275, 'test_steps_per_second': 29.057}
195 XLM-B 5 2e-05 1500
Model seed is: 5, total, training informative and uninf samples: 4508 ,             1789, 1817


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6529
200,0.5548
300,0.4518
400,0.455
500,0.3856
600,0.3249
700,0.3331
800,0.2185
900,0.2391
1000,0.1716


for ratio of 195, model of XLM-B, seed of 5 performance is:
 0.49320388349514566  and  {'test_loss': 0.9969546794891357, 'test_accuracy': 0.7960088691796009, 'test_f1': 0.8118609406952965, 'test_precision': 0.7738791423001949, 'test_recall': 0.853763440860215, 'test_AUC': 0.8767845279397652, 'test_runtime': 0.515, 'test_samples_per_second': 1751.364, 'test_steps_per_second': 29.125}
195 XLM-B 1759 2e-05 1500
Model seed is: 1759, total, training informative and uninf samples: 4508 ,             1769, 1837


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6564
200,0.5836
300,0.5283
400,0.4936
500,0.4043
600,0.3816
700,0.3187
800,0.2728
900,0.2669
1000,0.1603


for ratio of 195, model of XLM-B, seed of 1759 performance is:
 0.45436893203883494  and  {'test_loss': 0.9358072280883789, 'test_accuracy': 0.7960088691796009, 'test_f1': 0.8156312625250501, 'test_precision': 0.7933723196881092, 'test_recall': 0.8391752577319588, 'test_AUC': 0.8703602066800168, 'test_runtime': 0.5172, 'test_samples_per_second': 1744.062, 'test_steps_per_second': 29.003}
195 XLM-B 323 2e-05 1500
Model seed is: 323, total, training informative and uninf samples: 4508 ,             1816, 1790


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6484
200,0.5665
300,0.506
400,0.4719
500,0.4265
600,0.3787
700,0.313
800,0.2597
900,0.2569
1000,0.1932


for ratio of 195, model of XLM-B, seed of 323 performance is:
 0.4867313915857605  and  {'test_loss': 0.853652834892273, 'test_accuracy': 0.811529933481153, 'test_f1': 0.8199152542372882, 'test_precision': 0.7648221343873518, 'test_recall': 0.8835616438356164, 'test_AUC': 0.8913999763816721, 'test_runtime': 0.5199, 'test_samples_per_second': 1735.098, 'test_steps_per_second': 28.854}
195 XLM-B 200 2e-05 1500
Model seed is: 200, total, training informative and uninf samples: 4508 ,             1813, 1793


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6586
200,0.6001
300,0.5166
400,0.5013
500,0.4319
600,0.3715
700,0.3416
800,0.2931
900,0.2497
1000,0.199


for ratio of 195, model of XLM-B, seed of 200 performance is:
 0.44789644012944985  and  {'test_loss': 0.935314416885376, 'test_accuracy': 0.8159645232815964, 'test_f1': 0.8167770419426048, 'test_precision': 0.7956989247311828, 'test_recall': 0.8390022675736961, 'test_AUC': 0.8681659214662005, 'test_runtime': 0.517, 'test_samples_per_second': 1744.609, 'test_steps_per_second': 29.012}
195 XLM-B 999 2e-05 1500
Model seed is: 999, total, training informative and uninf samples: 4508 ,             1815, 1791


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6685
200,0.5943
300,0.4978
400,0.4873
500,0.4195
600,0.3375
700,0.3391
800,0.242
900,0.247
1000,0.1972


for ratio of 195, model of XLM-B, seed of 999 performance is:
 0.5003236245954693  and  {'test_loss': 0.9022416472434998, 'test_accuracy': 0.8226164079822617, 'test_f1': 0.8268398268398268, 'test_precision': 0.7876288659793814, 'test_recall': 0.8701594533029613, 'test_AUC': 0.8853225227175447, 'test_runtime': 0.5131, 'test_samples_per_second': 1758.078, 'test_steps_per_second': 29.236}
195 XLM-L 9741 8e-06 1500
Model seed is: 9741, total, training informative and uninf samples: 4508 ,             1809, 1797


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6864
200,0.557
300,0.4788
400,0.4478
500,0.3984
600,0.3422
700,0.3006
800,0.2421
900,0.2311
1000,0.1822


for ratio of 195, model of XLM-L, seed of 9741 performance is:
 0.456957928802589  and  {'test_loss': 0.9155508875846863, 'test_accuracy': 0.8137472283813747, 'test_f1': 0.8212765957446808, 'test_precision': 0.7797979797979798, 'test_recall': 0.8674157303370786, 'test_AUC': 0.9004007572591153, 'test_runtime': 1.5015, 'test_samples_per_second': 600.721, 'test_steps_per_second': 9.99}
195 XLM-L 1694 8e-06 1500
Model seed is: 1694, total, training informative and uninf samples: 4508 ,             1810, 1796


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.687
200,0.6115
300,0.4972
400,0.4656
500,0.4105
600,0.3413
700,0.3489
800,0.2749
900,0.2519
1000,0.1998


for ratio of 195, model of XLM-L, seed of 1694 performance is:
 0.5119741100323625  and  {'test_loss': 0.8290690779685974, 'test_accuracy': 0.8104212860310421, 'test_f1': 0.8178913738019169, 'test_precision': 0.7757575757575758, 'test_recall': 0.8648648648648649, 'test_AUC': 0.8935441992210553, 'test_runtime': 1.5012, 'test_samples_per_second': 600.845, 'test_steps_per_second': 9.992}
195 XLM-L 6932 8e-06 1500
Model seed is: 6932, total, training informative and uninf samples: 4508 ,             1792, 1814


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6874
200,0.5868
300,0.4922
400,0.4655
500,0.3987
600,0.3427
700,0.3267
800,0.2562
900,0.2777
1000,0.1919


for ratio of 195, model of XLM-L, seed of 6932 performance is:
 0.47702265372168284  and  {'test_loss': 0.9700421690940857, 'test_accuracy': 0.7982261640798226, 'test_f1': 0.8063829787234043, 'test_precision': 0.7928870292887029, 'test_recall': 0.8203463203463204, 'test_AUC': 0.8741587957497048, 'test_runtime': 1.4923, 'test_samples_per_second': 604.434, 'test_steps_per_second': 10.052}
195 XLM-L 94 8e-06 1500
Model seed is: 94, total, training informative and uninf samples: 4508 ,             1789, 1817


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6818
200,0.6207
300,0.5952
400,0.5256
500,0.4703
600,0.4557
700,0.386
800,0.3627
900,0.3541
1000,0.2726


for ratio of 195, model of XLM-L, seed of 94 performance is:
 0.48284789644012943  and  {'test_loss': 0.5636366605758667, 'test_accuracy': 0.8248337028824834, 'test_f1': 0.8384458077709611, 'test_precision': 0.7992202729044834, 'test_recall': 0.8817204301075269, 'test_AUC': 0.8924091434758005, 'test_runtime': 1.4995, 'test_samples_per_second': 601.516, 'test_steps_per_second': 10.003}
195 XLM-L 791 8e-06 1500
Model seed is: 791, total, training informative and uninf samples: 4508 ,             1822, 1784


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.7135
200,0.6259
300,0.5193
400,0.4593
500,0.3792
600,0.3617
700,0.3495
800,0.2595
900,0.2697
1000,0.2196


for ratio of 195, model of XLM-L, seed of 791 performance is:
 0.4970873786407767  and  {'test_loss': 0.8893342018127441, 'test_accuracy': 0.8082039911308204, 'test_f1': 0.807563959955506, 'test_precision': 0.7773019271948608, 'test_recall': 0.8402777777777778, 'test_AUC': 0.8867760047281322, 'test_runtime': 1.493, 'test_samples_per_second': 604.167, 'test_steps_per_second': 10.047}
195 XLM-L 5 8e-06 1500
Model seed is: 5, total, training informative and uninf samples: 4508 ,             1789, 1817


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.7014
200,0.6438
300,0.5447
400,0.5039
500,0.458
600,0.3958
700,0.4048
800,0.3166
900,0.2965
1000,0.2201


for ratio of 195, model of XLM-L, seed of 5 performance is:
 0.5074433656957928  and  {'test_loss': 0.6860358119010925, 'test_accuracy': 0.8237250554323725, 'test_f1': 0.8355739400206824, 'test_precision': 0.8047808764940239, 'test_recall': 0.8688172043010752, 'test_AUC': 0.8939937501537856, 'test_runtime': 1.4917, 'test_samples_per_second': 604.687, 'test_steps_per_second': 10.056}
195 XLM-L 1759 8e-06 1500
Model seed is: 1759, total, training informative and uninf samples: 4508 ,             1769, 1837


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.7954
200,0.7432
300,0.7093
400,0.6952
500,0.6412
600,0.5909
700,0.5327
800,0.5225
900,0.4892
1000,0.4193


for ratio of 195, model of XLM-L, seed of 1759 performance is:
 0.5372168284789643  and  {'test_loss': 0.5563607215881348, 'test_accuracy': 0.7926829268292683, 'test_f1': 0.8135593220338984, 'test_precision': 0.7876447876447876, 'test_recall': 0.8412371134020619, 'test_AUC': 0.8596603129867241, 'test_runtime': 1.4999, 'test_samples_per_second': 601.358, 'test_steps_per_second': 10.0}
195 XLM-L 323 8e-06 1500
Model seed is: 323, total, training informative and uninf samples: 4508 ,             1816, 1790


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6816
200,0.5555
300,0.4906
400,0.4353
500,0.4133
600,0.36
700,0.333
800,0.2865
900,0.2613
1000,0.2107


for ratio of 195, model of XLM-L, seed of 323 performance is:
 0.5203883495145631  and  {'test_loss': 0.784050703048706, 'test_accuracy': 0.8192904656319291, 'test_f1': 0.8282402528977871, 'test_precision': 0.7690802348336595, 'test_recall': 0.8972602739726028, 'test_AUC': 0.9124498110533775, 'test_runtime': 1.4964, 'test_samples_per_second': 602.796, 'test_steps_per_second': 10.024}
195 XLM-L 200 8e-06 1500
Model seed is: 200, total, training informative and uninf samples: 4508 ,             1813, 1793


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.7024
200,0.6412
300,0.54
400,0.5197
500,0.4442
600,0.3801
700,0.3906
800,0.3292
900,0.267
1000,0.2228


for ratio of 195, model of XLM-L, seed of 200 performance is:
 0.486084142394822  and  {'test_loss': 0.7705063223838806, 'test_accuracy': 0.8181818181818182, 'test_f1': 0.8205689277899344, 'test_precision': 0.7928118393234672, 'test_recall': 0.8503401360544217, 'test_AUC': 0.8859769504331015, 'test_runtime': 1.4986, 'test_samples_per_second': 601.876, 'test_steps_per_second': 10.009}
195 XLM-L 999 8e-06 1500
Model seed is: 999, total, training informative and uninf samples: 4508 ,             1815, 1791


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6831
200,0.6009
300,0.5455
400,0.5563
500,0.4952
600,0.4374
700,0.461
800,0.3843
900,0.3657
1000,0.3413


for ratio of 195, model of XLM-L, seed of 999 performance is:
 0.5469255663430421  and  {'test_loss': 0.5553362369537354, 'test_accuracy': 0.8226164079822617, 'test_f1': 0.8301486199575373, 'test_precision': 0.7773359840954275, 'test_recall': 0.8906605922551253, 'test_AUC': 0.8981880082850775, 'test_runtime': 1.5012, 'test_samples_per_second': 600.872, 'test_steps_per_second': 9.992}
number of training samples:  (3606, 13)
175 ALBERT-L 9741 8e-06 1200
Model seed is: 9741, total, training informative and uninf samples: 3256 ,             1321, 1284


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6153
200,0.5129
300,0.4224
400,0.3074
500,0.2509
600,0.1512
700,0.1303
800,0.0929
900,0.059
1000,0.0824


for ratio of 175, model of ALBERT-L, seed of 9741 performance is:
 0.5574468085106383  and  {'test_loss': 1.1381642818450928, 'test_accuracy': 0.7772657450076805, 'test_f1': 0.7758887171561053, 'test_precision': 0.7382352941176471, 'test_recall': 0.8175895765472313, 'test_AUC': 0.8455893492917204, 'test_runtime': 1.2162, 'test_samples_per_second': 535.279, 'test_steps_per_second': 9.045}
175 ALBERT-L 1694 8e-06 1200
Model seed is: 1694, total, training informative and uninf samples: 3256 ,             1289, 1316


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6389
200,0.5346
300,0.477
400,0.3559
500,0.3267
600,0.2116
700,0.1631
800,0.1235
900,0.0985
1000,0.0632


for ratio of 175, model of ALBERT-L, seed of 1694 performance is:
 0.4695035460992908  and  {'test_loss': 1.188646912574768, 'test_accuracy': 0.7603686635944701, 'test_f1': 0.7712609970674487, 'test_precision': 0.7667638483965015, 'test_recall': 0.775811209439528, 'test_AUC': 0.8459836623553438, 'test_runtime': 1.2109, 'test_samples_per_second': 537.628, 'test_steps_per_second': 9.084}
175 ALBERT-L 6932 8e-06 1200
Model seed is: 6932, total, training informative and uninf samples: 3256 ,             1293, 1312


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6064
200,0.4903
300,0.3942
400,0.3047
500,0.2547
600,0.1562
700,0.1091
800,0.0946
900,0.0637
1000,0.081


for ratio of 175, model of ALBERT-L, seed of 6932 performance is:
 0.47423167848699765  and  {'test_loss': 1.2014799118041992, 'test_accuracy': 0.7926267281105991, 'test_f1': 0.7900466562986004, 'test_precision': 0.8246753246753247, 'test_recall': 0.7582089552238805, 'test_AUC': 0.8460891743812582, 'test_runtime': 1.21, 'test_samples_per_second': 537.998, 'test_steps_per_second': 9.091}
175 ALBERT-L 94 8e-06 1200
Model seed is: 94, total, training informative and uninf samples: 3256 ,             1275, 1330


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6587
200,0.5211
300,0.4534
400,0.348
500,0.2808
600,0.1733
700,0.1544
800,0.098
900,0.0692
1000,0.0709


for ratio of 175, model of ALBERT-L, seed of 94 performance is:
 0.5054373522458628  and  {'test_loss': 1.071860432624817, 'test_accuracy': 0.794162826420891, 'test_f1': 0.8057971014492754, 'test_precision': 0.8249258160237388, 'test_recall': 0.7875354107648725, 'test_AUC': 0.8688613419016293, 'test_runtime': 1.2153, 'test_samples_per_second': 535.663, 'test_steps_per_second': 9.051}
175 ALBERT-L 791 8e-06 1200
Model seed is: 791, total, training informative and uninf samples: 3256 ,             1287, 1318


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6037
200,0.5576
300,0.4267
400,0.3267
500,0.2893
600,0.1809
700,0.1391
800,0.1341
900,0.084
1000,0.0951


for ratio of 175, model of ALBERT-L, seed of 791 performance is:
 0.5153664302600472  and  {'test_loss': 1.041755199432373, 'test_accuracy': 0.7788018433179723, 'test_f1': 0.7948717948717948, 'test_precision': 0.7728531855955678, 'test_recall': 0.8181818181818182, 'test_AUC': 0.8285592659161858, 'test_runtime': 1.2323, 'test_samples_per_second': 528.281, 'test_steps_per_second': 8.926}
175 ALBERT-L 5 8e-06 1200
Model seed is: 5, total, training informative and uninf samples: 3256 ,             1274, 1331


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6129
200,0.5135
300,0.4245
400,0.3401
500,0.2748
600,0.1953
700,0.1247
800,0.1205
900,0.0598
1000,0.0426


for ratio of 175, model of ALBERT-L, seed of 5 performance is:
 0.5016548463356973  and  {'test_loss': 0.9613151550292969, 'test_accuracy': 0.815668202764977, 'test_f1': 0.8270893371757926, 'test_precision': 0.8441176470588235, 'test_recall': 0.8107344632768362, 'test_AUC': 0.8780650193079571, 'test_runtime': 1.2082, 'test_samples_per_second': 538.832, 'test_steps_per_second': 9.105}
175 ALBERT-L 1759 8e-06 1200
Model seed is: 1759, total, training informative and uninf samples: 3256 ,             1304, 1301


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6359
200,0.5356
300,0.4526
400,0.3619
500,0.3236
600,0.224
700,0.172
800,0.1431
900,0.0902
1000,0.0771


for ratio of 175, model of ALBERT-L, seed of 1759 performance is:
 0.4912529550827423  and  {'test_loss': 1.0338397026062012, 'test_accuracy': 0.7880184331797235, 'test_f1': 0.7940298507462686, 'test_precision': 0.7687861271676301, 'test_recall': 0.8209876543209876, 'test_AUC': 0.8536829388001662, 'test_runtime': 1.2134, 'test_samples_per_second': 536.494, 'test_steps_per_second': 9.065}
175 ALBERT-L 323 8e-06 1200
Model seed is: 323, total, training informative and uninf samples: 3256 ,             1284, 1321


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6301
200,0.5323
300,0.4315
400,0.3253
500,0.2985
600,0.2166
700,0.1679
800,0.1347
900,0.0826
1000,0.0832


for ratio of 175, model of ALBERT-L, seed of 323 performance is:
 0.47044917257683216  and  {'test_loss': 1.180240273475647, 'test_accuracy': 0.7741935483870968, 'test_f1': 0.7860262008733625, 'test_precision': 0.7871720116618076, 'test_recall': 0.7848837209302325, 'test_AUC': 0.8406465419286416, 'test_runtime': 1.2082, 'test_samples_per_second': 538.807, 'test_steps_per_second': 9.104}
175 ALBERT-L 200 8e-06 1200
Model seed is: 200, total, training informative and uninf samples: 3256 ,             1288, 1317


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6375
200,0.5529
300,0.4465
400,0.352
500,0.3047
600,0.199
700,0.1713
800,0.1288
900,0.0985
1000,0.0676


for ratio of 175, model of ALBERT-L, seed of 200 performance is:
 0.4444444444444444  and  {'test_loss': 0.9762758016586304, 'test_accuracy': 0.8187403993855606, 'test_f1': 0.8201219512195123, 'test_precision': 0.8512658227848101, 'test_recall': 0.7911764705882353, 'test_AUC': 0.8686495176848876, 'test_runtime': 1.2147, 'test_samples_per_second': 535.933, 'test_steps_per_second': 9.056}
175 ALBERT-L 999 8e-06 1200
Model seed is: 999, total, training informative and uninf samples: 3256 ,             1301, 1304


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6572
200,0.4983
300,0.495
400,0.3672
500,0.3273
600,0.244
700,0.2173
800,0.1714
900,0.1093
1000,0.1165


for ratio of 175, model of ALBERT-L, seed of 999 performance is:
 0.4695035460992908  and  {'test_loss': 1.014793038368225, 'test_accuracy': 0.7972350230414746, 'test_f1': 0.8041543026706232, 'test_precision': 0.7809798270893372, 'test_recall': 0.8287461773700305, 'test_AUC': 0.8573073583267263, 'test_runtime': 1.2155, 'test_samples_per_second': 535.595, 'test_steps_per_second': 9.05}
175 DISRoBERTa-B 9741 2e-05 1200
Model seed is: 9741, total, training informative and uninf samples: 3256 ,             1321, 1284


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6108
200,0.5067
300,0.4019
400,0.3041
500,0.2824
600,0.2305
700,0.1546
800,0.1271
900,0.0789
1000,0.096


for ratio of 175, model of DISRoBERTa-B, seed of 9741 performance is:
 0.4836879432624113  and  {'test_loss': 1.0884201526641846, 'test_accuracy': 0.7864823348694316, 'test_f1': 0.7831513260530423, 'test_precision': 0.7514970059880239, 'test_recall': 0.8175895765472313, 'test_AUC': 0.8684001212029391, 'test_runtime': 0.2165, 'test_samples_per_second': 3006.805, 'test_steps_per_second': 50.806}
175 DISRoBERTa-B 1694 2e-05 1200
Model seed is: 1694, total, training informative and uninf samples: 3256 ,             1289, 1316


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6305
200,0.4943
300,0.4427
400,0.3114
500,0.2809
600,0.1752
700,0.1671
800,0.1311
900,0.1046
1000,0.0695


for ratio of 175, model of DISRoBERTa-B, seed of 1694 performance is:
 0.4718676122931442  and  {'test_loss': 1.0711830854415894, 'test_accuracy': 0.8079877112135176, 'test_f1': 0.8211731044349071, 'test_precision': 0.7972222222222223, 'test_recall': 0.8466076696165191, 'test_AUC': 0.87809167233946, 'test_runtime': 0.2255, 'test_samples_per_second': 2886.898, 'test_steps_per_second': 48.78}
175 DISRoBERTa-B 6932 2e-05 1200
Model seed is: 6932, total, training informative and uninf samples: 3256 ,             1293, 1312


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6182
200,0.4973
300,0.4036
400,0.3494
500,0.2854
600,0.1937
700,0.1597
800,0.125
900,0.0929
1000,0.0741


for ratio of 175, model of DISRoBERTa-B, seed of 6932 performance is:
 0.48699763593380613  and  {'test_loss': 1.110069751739502, 'test_accuracy': 0.7864823348694316, 'test_f1': 0.7952871870397644, 'test_precision': 0.7848837209302325, 'test_recall': 0.8059701492537313, 'test_AUC': 0.8786132627999246, 'test_runtime': 0.2255, 'test_samples_per_second': 2886.349, 'test_steps_per_second': 48.771}
175 DISRoBERTa-B 94 2e-05 1200
Model seed is: 94, total, training informative and uninf samples: 3256 ,             1275, 1330


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6295
200,0.4651
300,0.4144
400,0.3238
500,0.2849
600,0.1966
700,0.1543
800,0.1485
900,0.0924
1000,0.0732


for ratio of 175, model of DISRoBERTa-B, seed of 94 performance is:
 0.4813238770685579  and  {'test_loss': 1.1127445697784424, 'test_accuracy': 0.804915514592934, 'test_f1': 0.8172661870503596, 'test_precision': 0.8304093567251462, 'test_recall': 0.8045325779036827, 'test_AUC': 0.8532140616384964, 'test_runtime': 0.2185, 'test_samples_per_second': 2979.146, 'test_steps_per_second': 50.339}
175 DISRoBERTa-B 791 2e-05 1200
Model seed is: 791, total, training informative and uninf samples: 3256 ,             1287, 1318


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.5958
200,0.5098
300,0.4075
400,0.3301
500,0.2777
600,0.1932
700,0.1536
800,0.1356
900,0.0867
1000,0.0798


for ratio of 175, model of DISRoBERTa-B, seed of 791 performance is:
 0.4945626477541371  and  {'test_loss': 1.1527910232543945, 'test_accuracy': 0.8095238095238095, 'test_f1': 0.8208092485549134, 'test_precision': 0.8091168091168092, 'test_recall': 0.8328445747800587, 'test_AUC': 0.8739475924699651, 'test_runtime': 0.2189, 'test_samples_per_second': 2974.295, 'test_steps_per_second': 50.257}
175 DISRoBERTa-B 5 2e-05 1200
Model seed is: 5, total, training informative and uninf samples: 3256 ,             1274, 1331


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6205
200,0.4991
300,0.4387
400,0.3338
500,0.3201
600,0.2011
700,0.1869
800,0.1487
900,0.0901
1000,0.0997


for ratio of 175, model of DISRoBERTa-B, seed of 5 performance is:
 0.4524822695035461  and  {'test_loss': 0.9389167428016663, 'test_accuracy': 0.8187403993855606, 'test_f1': 0.830945558739255, 'test_precision': 0.8430232558139535, 'test_recall': 0.8192090395480226, 'test_AUC': 0.8932736023131503, 'test_runtime': 0.2147, 'test_samples_per_second': 3032.33, 'test_steps_per_second': 51.238}
175 DISRoBERTa-B 1759 2e-05 1200
Model seed is: 1759, total, training informative and uninf samples: 3256 ,             1304, 1301


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6352
200,0.5146
300,0.3921
400,0.3304
500,0.3094
600,0.1989
700,0.1554
800,0.1282
900,0.0872
1000,0.0911


for ratio of 175, model of DISRoBERTa-B, seed of 1759 performance is:
 0.51725768321513  and  {'test_loss': 0.9690761566162109, 'test_accuracy': 0.815668202764977, 'test_f1': 0.8245614035087719, 'test_precision': 0.7833333333333333, 'test_recall': 0.8703703703703703, 'test_AUC': 0.8806206818439234, 'test_runtime': 0.217, 'test_samples_per_second': 2999.443, 'test_steps_per_second': 50.682}
175 DISRoBERTa-B 323 2e-05 1200
Model seed is: 323, total, training informative and uninf samples: 3256 ,             1284, 1321


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6165
200,0.5222
300,0.3816
400,0.3144
500,0.2664
600,0.2077
700,0.1395
800,0.0921
900,0.0769
1000,0.0579


for ratio of 175, model of DISRoBERTa-B, seed of 323 performance is:
 0.47848699763593383  and  {'test_loss': 1.2093472480773926, 'test_accuracy': 0.7926267281105991, 'test_f1': 0.8074179743223966, 'test_precision': 0.7927170868347339, 'test_recall': 0.8226744186046512, 'test_AUC': 0.8765813195970003, 'test_runtime': 0.2225, 'test_samples_per_second': 2926.281, 'test_steps_per_second': 49.446}
175 DISRoBERTa-B 200 2e-05 1200
Model seed is: 200, total, training informative and uninf samples: 3256 ,             1288, 1317


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6048
200,0.4926
300,0.3999
400,0.31
500,0.2983
600,0.18
700,0.181
800,0.1189
900,0.1155
1000,0.0827


for ratio of 175, model of DISRoBERTa-B, seed of 200 performance is:
 0.48226950354609927  and  {'test_loss': 0.8635914921760559, 'test_accuracy': 0.837173579109063, 'test_f1': 0.8436578171091446, 'test_precision': 0.8461538461538461, 'test_recall': 0.8411764705882353, 'test_AUC': 0.906941554756951, 'test_runtime': 0.217, 'test_samples_per_second': 3000.264, 'test_steps_per_second': 50.696}
175 DISRoBERTa-B 999 2e-05 1200
Model seed is: 999, total, training informative and uninf samples: 3256 ,             1301, 1304


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6097
200,0.4634
300,0.4273
400,0.2898
500,0.2674
600,0.1689
700,0.1514
800,0.115
900,0.074
1000,0.0746


for ratio of 175, model of DISRoBERTa-B, seed of 999 performance is:
 0.49976359338061466  and  {'test_loss': 1.0721008777618408, 'test_accuracy': 0.8064516129032258, 'test_f1': 0.816860465116279, 'test_precision': 0.778393351800554, 'test_recall': 0.8593272171253823, 'test_AUC': 0.8881243628950052, 'test_runtime': 0.2219, 'test_samples_per_second': 2933.644, 'test_steps_per_second': 49.57}
175 XLNet-B 9741 2e-05 1200
Model seed is: 9741, total, training informative and uninf samples: 3256 ,             1321, 1284


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6338
200,0.549
300,0.4609
400,0.3635
500,0.3156
600,0.2108
700,0.1612
800,0.1133
900,0.0868
1000,0.0878


for ratio of 175, model of XLNet-B, seed of 9741 performance is:
 0.5026004728132387  and  {'test_loss': 1.2309491634368896, 'test_accuracy': 0.7910906298003072, 'test_f1': 0.7848101265822786, 'test_precision': 0.7630769230769231, 'test_recall': 0.8078175895765473, 'test_AUC': 0.8786171502158928, 'test_runtime': 0.5218, 'test_samples_per_second': 1247.642, 'test_steps_per_second': 21.082}
175 XLNet-B 1694 2e-05 1200
Model seed is: 1694, total, training informative and uninf samples: 3256 ,             1289, 1316


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6595
200,0.5136
300,0.4931
400,0.3388
500,0.322
600,0.2284
700,0.1638
800,0.1472
900,0.0744
1000,0.0699


for ratio of 175, model of XLNet-B, seed of 1694 performance is:
 0.5044917257683215  and  {'test_loss': 1.323141098022461, 'test_accuracy': 0.7926267281105991, 'test_f1': 0.8057553956834532, 'test_precision': 0.7865168539325843, 'test_recall': 0.8259587020648967, 'test_AUC': 0.8709155888359428, 'test_runtime': 0.5119, 'test_samples_per_second': 1271.645, 'test_steps_per_second': 21.487}
175 XLNet-B 6932 2e-05 1200
Model seed is: 6932, total, training informative and uninf samples: 3256 ,             1293, 1312


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6477
200,0.5395
300,0.4642
400,0.3599
500,0.3152
600,0.2191
700,0.1557
800,0.1355
900,0.0885
1000,0.0812


for ratio of 175, model of XLNet-B, seed of 6932 performance is:
 0.5479905437352246  and  {'test_loss': 1.349302887916565, 'test_accuracy': 0.7803379416282642, 'test_f1': 0.791848617176128, 'test_precision': 0.7727272727272727, 'test_recall': 0.8119402985074626, 'test_AUC': 0.8720763272246363, 'test_runtime': 0.5237, 'test_samples_per_second': 1243.148, 'test_steps_per_second': 21.006}
175 XLNet-B 94 2e-05 1200
Model seed is: 94, total, training informative and uninf samples: 3256 ,             1275, 1330


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6223
200,0.4635
300,0.4324
400,0.3365
500,0.2603
600,0.1747
700,0.1551
800,0.1235
900,0.0586
1000,0.0615


for ratio of 175, model of XLNet-B, seed of 94 performance is:
 0.4912529550827423  and  {'test_loss': 1.4154200553894043, 'test_accuracy': 0.7818740399385561, 'test_f1': 0.7994350282485877, 'test_precision': 0.7971830985915493, 'test_recall': 0.8016997167138811, 'test_AUC': 0.861646101488678, 'test_runtime': 0.5185, 'test_samples_per_second': 1255.508, 'test_steps_per_second': 21.214}
175 XLNet-B 791 2e-05 1200
Model seed is: 791, total, training informative and uninf samples: 3256 ,             1287, 1318


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6355
200,0.5756
300,0.4746
400,0.3879
500,0.3109
600,0.1993
700,0.1522
800,0.1359
900,0.0788
1000,0.1109


for ratio of 175, model of XLNet-B, seed of 791 performance is:
 0.5333333333333333  and  {'test_loss': 1.38141667842865, 'test_accuracy': 0.7849462365591398, 'test_f1': 0.8071625344352618, 'test_precision': 0.7610389610389611, 'test_recall': 0.8592375366568915, 'test_AUC': 0.8549806073219185, 'test_runtime': 0.5118, 'test_samples_per_second': 1272.082, 'test_steps_per_second': 21.494}
175 XLNet-B 5 2e-05 1200
Model seed is: 5, total, training informative and uninf samples: 3256 ,             1274, 1331


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6381
200,0.5226
300,0.4768
400,0.3559
500,0.309
600,0.2096
700,0.1811
800,0.1324
900,0.0832
1000,0.1039


for ratio of 175, model of XLNet-B, seed of 5 performance is:
 0.49976359338061466  and  {'test_loss': 1.2456495761871338, 'test_accuracy': 0.7818740399385561, 'test_f1': 0.8027777777777778, 'test_precision': 0.7896174863387978, 'test_recall': 0.8163841807909604, 'test_AUC': 0.8674884437596302, 'test_runtime': 0.5193, 'test_samples_per_second': 1253.527, 'test_steps_per_second': 21.181}
175 XLNet-B 1759 2e-05 1200
Model seed is: 1759, total, training informative and uninf samples: 3256 ,             1304, 1301


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6619
200,0.562
300,0.4618
400,0.3654
500,0.3448
600,0.2348
700,0.1784
800,0.1239
900,0.0801
1000,0.0767


for ratio of 175, model of XLNet-B, seed of 1759 performance is:
 0.5702127659574469  and  {'test_loss': 1.196649193763733, 'test_accuracy': 0.7956989247311828, 'test_f1': 0.8080808080808081, 'test_precision': 0.7588075880758808, 'test_recall': 0.8641975308641975, 'test_AUC': 0.8838014875221807, 'test_runtime': 0.5209, 'test_samples_per_second': 1249.833, 'test_steps_per_second': 21.119}
175 XLNet-B 323 2e-05 1200
Model seed is: 323, total, training informative and uninf samples: 3256 ,             1284, 1321


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6321
200,0.5624
300,0.4398
400,0.3308
500,0.2961
600,0.1862
700,0.1333
800,0.1248
900,0.0666
1000,0.07


for ratio of 175, model of XLNet-B, seed of 323 performance is:
 0.5295508274231678  and  {'test_loss': 1.241348147392273, 'test_accuracy': 0.8172043010752689, 'test_f1': 0.8349514563106796, 'test_precision': 0.7984084880636605, 'test_recall': 0.875, 'test_AUC': 0.8807855465495037, 'test_runtime': 0.517, 'test_samples_per_second': 1259.278, 'test_steps_per_second': 21.278}
175 XLNet-B 200 2e-05 1200
Model seed is: 200, total, training informative and uninf samples: 3256 ,             1288, 1317


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6847
200,0.5437
300,0.4938
400,0.4057
500,0.365
600,0.2419
700,0.2062
800,0.1681
900,0.137
1000,0.1074


for ratio of 175, model of XLNet-B, seed of 200 performance is:
 0.5914893617021276  and  {'test_loss': 1.160494089126587, 'test_accuracy': 0.7818740399385561, 'test_f1': 0.804945054945055, 'test_precision': 0.7551546391752577, 'test_recall': 0.861764705882353, 'test_AUC': 0.8747115566483828, 'test_runtime': 0.5159, 'test_samples_per_second': 1261.946, 'test_steps_per_second': 21.323}
175 XLNet-B 999 2e-05 1200
Model seed is: 999, total, training informative and uninf samples: 3256 ,             1301, 1304


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6324
200,0.4915
300,0.4374
400,0.3136
500,0.2898
600,0.1961
700,0.1625
800,0.1427
900,0.0902
1000,0.0878


for ratio of 175, model of XLNet-B, seed of 999 performance is:
 0.5754137115839244  and  {'test_loss': 1.2383424043655396, 'test_accuracy': 0.8064516129032258, 'test_f1': 0.8240223463687152, 'test_precision': 0.7583547557840618, 'test_recall': 0.9021406727828746, 'test_AUC': 0.8769868237248462, 'test_runtime': 0.518, 'test_samples_per_second': 1256.715, 'test_steps_per_second': 21.235}
175 XLNet-L 9741 1e-05 900
Model seed is: 9741, total, training informative and uninf samples: 3256 ,             1321, 1284


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6821
200,0.6069
300,0.5113
400,0.4052
500,0.3209
600,0.2344
700,0.1738
800,0.1306
900,0.1013


for ratio of 175, model of XLNet-L, seed of 9741 performance is:
 0.5673758865248227  and  {'test_loss': 0.748542070388794, 'test_accuracy': 0.7880184331797235, 'test_f1': 0.7876923076923078, 'test_precision': 0.7463556851311953, 'test_recall': 0.8338762214983714, 'test_AUC': 0.872405499583365, 'test_runtime': 1.3953, 'test_samples_per_second': 466.555, 'test_steps_per_second': 7.883}
175 XLNet-L 1694 1e-05 900
Model seed is: 1694, total, training informative and uninf samples: 3256 ,             1289, 1316


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.675
200,0.5583
300,0.4753
400,0.3748
500,0.2803
600,0.1926
700,0.1559
800,0.1184
900,0.0779


for ratio of 175, model of XLNet-L, seed of 1694 performance is:
 0.4628841607565012  and  {'test_loss': 0.7389739751815796, 'test_accuracy': 0.8064516129032258, 'test_f1': 0.8179190751445087, 'test_precision': 0.8016997167138811, 'test_recall': 0.8348082595870207, 'test_AUC': 0.8729861583843885, 'test_runtime': 1.3899, 'test_samples_per_second': 468.389, 'test_steps_per_second': 7.914}
175 XLNet-L 6932 1e-05 900
Model seed is: 6932, total, training informative and uninf samples: 3256 ,             1293, 1312


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6979
200,0.61
300,0.4893
400,0.4171
500,0.335
600,0.2593
700,0.1788
800,0.1517
900,0.1351


for ratio of 175, model of XLNet-L, seed of 6932 performance is:
 0.5408983451536643  and  {'test_loss': 0.6178068518638611, 'test_accuracy': 0.794162826420891, 'test_f1': 0.8023598820058997, 'test_precision': 0.793002915451895, 'test_recall': 0.8119402985074626, 'test_AUC': 0.879548460230493, 'test_runtime': 1.3902, 'test_samples_per_second': 468.262, 'test_steps_per_second': 7.912}
175 XLNet-L 94 1e-05 900
Model seed is: 94, total, training informative and uninf samples: 3256 ,             1275, 1330


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6823
200,0.5687
300,0.4744
400,0.3805
500,0.2886
600,0.2074
700,0.1587
800,0.1107
900,0.0862


for ratio of 175, model of XLNet-L, seed of 94 performance is:
 0.4817966903073286  and  {'test_loss': 0.7457115054130554, 'test_accuracy': 0.7956989247311828, 'test_f1': 0.8023774145616641, 'test_precision': 0.84375, 'test_recall': 0.7648725212464589, 'test_AUC': 0.8668650303249235, 'test_runtime': 1.3848, 'test_samples_per_second': 470.089, 'test_steps_per_second': 7.943}
175 XLNet-L 791 1e-05 900
Model seed is: 791, total, training informative and uninf samples: 3256 ,             1287, 1318


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.7062
200,0.7079
300,0.6991
400,0.632
500,0.5632
600,0.4931
700,0.4246
800,0.3853
900,0.3566


for ratio of 175, model of XLNet-L, seed of 791 performance is:
 0.508274231678487  and  {'test_loss': 0.628106951713562, 'test_accuracy': 0.7634408602150538, 'test_f1': 0.7701492537313434, 'test_precision': 0.78419452887538, 'test_recall': 0.7565982404692082, 'test_AUC': 0.8239428625484817, 'test_runtime': 1.3921, 'test_samples_per_second': 467.63, 'test_steps_per_second': 7.902}
175 XLNet-L 5 1e-05 900
Model seed is: 5, total, training informative and uninf samples: 3256 ,             1274, 1331


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6959
200,0.6529
300,0.5681
400,0.5045
500,0.4357
600,0.3438
700,0.304
800,0.2258
900,0.2173


for ratio of 175, model of XLNet-L, seed of 5 performance is:
 0.5342789598108747  and  {'test_loss': 0.5800862312316895, 'test_accuracy': 0.8003072196620584, 'test_f1': 0.8163841807909603, 'test_precision': 0.8163841807909604, 'test_recall': 0.8163841807909604, 'test_AUC': 0.8625520744164813, 'test_runtime': 1.3957, 'test_samples_per_second': 466.429, 'test_steps_per_second': 7.881}
175 XLNet-L 1759 1e-05 900
Model seed is: 1759, total, training informative and uninf samples: 3256 ,             1304, 1301


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6893
200,0.595
300,0.4969
400,0.3991
500,0.3088
600,0.2071
700,0.1485
800,0.1123
900,0.0851


for ratio of 175, model of XLNet-L, seed of 1759 performance is:
 0.5962174940898345  and  {'test_loss': 0.8139095306396484, 'test_accuracy': 0.7849462365591398, 'test_f1': 0.7965116279069767, 'test_precision': 0.7527472527472527, 'test_recall': 0.845679012345679, 'test_AUC': 0.8687563710499491, 'test_runtime': 1.3892, 'test_samples_per_second': 468.618, 'test_steps_per_second': 7.918}
175 XLNet-L 323 1e-05 900
Model seed is: 323, total, training informative and uninf samples: 3256 ,             1284, 1321


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6979
200,0.6315
300,0.5276
400,0.4234
500,0.3148
600,0.2344
700,0.1709
800,0.1219
900,0.0814


for ratio of 175, model of XLNet-L, seed of 323 performance is:
 0.4359338061465721  and  {'test_loss': 0.9306308627128601, 'test_accuracy': 0.8003072196620584, 'test_f1': 0.8065476190476191, 'test_precision': 0.8262195121951219, 'test_recall': 0.7877906976744186, 'test_AUC': 0.8634383758806151, 'test_runtime': 1.3835, 'test_samples_per_second': 470.539, 'test_steps_per_second': 7.951}
175 XLNet-L 200 1e-05 900
Model seed is: 200, total, training informative and uninf samples: 3256 ,             1288, 1317


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6994
200,0.6237
300,0.5535
400,0.4789
500,0.4173
600,0.3453
700,0.2953
800,0.2618
900,0.2157


for ratio of 175, model of XLNet-L, seed of 200 performance is:
 0.5210401891252955  and  {'test_loss': 0.6150898337364197, 'test_accuracy': 0.783410138248848, 'test_f1': 0.789865871833085, 'test_precision': 0.8006042296072508, 'test_recall': 0.7794117647058824, 'test_AUC': 0.8596179307735955, 'test_runtime': 1.3932, 'test_samples_per_second': 467.271, 'test_steps_per_second': 7.896}
175 XLNet-L 999 1e-05 900
Model seed is: 999, total, training informative and uninf samples: 3256 ,             1301, 1304


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.685
200,0.5667
300,0.4592
400,0.3819
500,0.3036
600,0.2097
700,0.1472
800,0.1229
900,0.0914


for ratio of 175, model of XLNet-L, seed of 999 performance is:
 0.5981087470449172  and  {'test_loss': 0.8085730075836182, 'test_accuracy': 0.783410138248848, 'test_f1': 0.8016877637130801, 'test_precision': 0.7421875, 'test_recall': 0.8715596330275229, 'test_AUC': 0.8716445803601767, 'test_runtime': 1.3907, 'test_samples_per_second': 468.113, 'test_steps_per_second': 7.91}
175 DEBERT-B 9741 3e-05 1200
Model seed is: 9741, total, training informative and uninf samples: 3256 ,             1321, 1284


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6003
200,0.4694
300,0.3605
400,0.2132
500,0.1965
600,0.1059
700,0.0678
800,0.0473
900,0.0163
1000,0.0232


for ratio of 175, model of DEBERT-B, seed of 9741 performance is:
 0.49314420803782505  and  {'test_loss': 1.4204986095428467, 'test_accuracy': 0.794162826420891, 'test_f1': 0.7879746835443039, 'test_precision': 0.7661538461538462, 'test_recall': 0.8110749185667753, 'test_AUC': 0.8596602530111355, 'test_runtime': 0.465, 'test_samples_per_second': 1400.03, 'test_steps_per_second': 23.656}
175 DEBERT-B 1694 3e-05 1200
Model seed is: 1694, total, training informative and uninf samples: 3256 ,             1289, 1316


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6185
200,0.4473
300,0.398
400,0.2481
500,0.1841
600,0.1218
700,0.0891
800,0.0424
900,0.0343
1000,0.013


for ratio of 175, model of DEBERT-B, seed of 1694 performance is:
 0.5068557919621749  and  {'test_loss': 1.2036068439483643, 'test_accuracy': 0.8172043010752689, 'test_f1': 0.8287769784172662, 'test_precision': 0.8089887640449438, 'test_recall': 0.8495575221238938, 'test_AUC': 0.8897303532259285, 'test_runtime': 0.4651, 'test_samples_per_second': 1399.651, 'test_steps_per_second': 23.65}
175 DEBERT-B 6932 3e-05 1200
Model seed is: 6932, total, training informative and uninf samples: 3256 ,             1293, 1312


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6543
200,0.5439
300,0.4026
400,0.2875
500,0.2081
600,0.1379
700,0.0925
800,0.06
900,0.0307
1000,0.017


for ratio of 175, model of DEBERT-B, seed of 6932 performance is:
 0.5200945626477541  and  {'test_loss': 1.3539339303970337, 'test_accuracy': 0.804915514592934, 'test_f1': 0.8167388167388167, 'test_precision': 0.7905027932960894, 'test_recall': 0.844776119402985, 'test_AUC': 0.8764878140940865, 'test_runtime': 0.4783, 'test_samples_per_second': 1360.985, 'test_steps_per_second': 22.997}
175 DEBERT-B 94 3e-05 1200
Model seed is: 94, total, training informative and uninf samples: 3256 ,             1275, 1330


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6264
200,0.4674
300,0.3946
400,0.2854
500,0.2063
600,0.1466
700,0.0915
800,0.0712
900,0.031
1000,0.0224


for ratio of 175, model of DEBERT-B, seed of 94 performance is:
 0.47139479905437354  and  {'test_loss': 1.2120732069015503, 'test_accuracy': 0.8095238095238095, 'test_f1': 0.8233618233618234, 'test_precision': 0.828080229226361, 'test_recall': 0.8186968838526912, 'test_AUC': 0.861513014050231, 'test_runtime': 0.4642, 'test_samples_per_second': 1402.32, 'test_steps_per_second': 23.695}
175 DEBERT-B 791 3e-05 1200
Model seed is: 791, total, training informative and uninf samples: 3256 ,             1287, 1318


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6163
200,0.5026
300,0.3605
400,0.2584
500,0.1699
600,0.0976
700,0.0467
800,0.0527
900,0.0071
1000,0.0176


for ratio of 175, model of DEBERT-B, seed of 791 performance is:
 0.5177304964539007  and  {'test_loss': 1.4151358604431152, 'test_accuracy': 0.7972350230414746, 'test_f1': 0.8191780821917808, 'test_precision': 0.7686375321336761, 'test_recall': 0.8768328445747801, 'test_AUC': 0.8817708826033488, 'test_runtime': 0.4622, 'test_samples_per_second': 1408.633, 'test_steps_per_second': 23.802}
175 DEBERT-B 5 3e-05 1200
Model seed is: 5, total, training informative and uninf samples: 3256 ,             1274, 1331


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6623
200,0.5314
300,0.4634
400,0.3294
500,0.2694
600,0.1766
700,0.1453
800,0.1043
900,0.0542
1000,0.054


for ratio of 175, model of DEBERT-B, seed of 5 performance is:
 0.45673758865248226  and  {'test_loss': 1.0158507823944092, 'test_accuracy': 0.8294930875576036, 'test_f1': 0.8460471567267683, 'test_precision': 0.8310626702997275, 'test_recall': 0.8615819209039548, 'test_AUC': 0.9015674637143565, 'test_runtime': 0.4753, 'test_samples_per_second': 1369.745, 'test_steps_per_second': 23.145}
175 DEBERT-B 1759 3e-05 1200
Model seed is: 1759, total, training informative and uninf samples: 3256 ,             1304, 1301


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6441
200,0.5031
300,0.3865
400,0.2883
500,0.2601
600,0.1697
700,0.1197
800,0.0829
900,0.0276
1000,0.0448


for ratio of 175, model of DEBERT-B, seed of 1759 performance is:
 0.5290780141843971  and  {'test_loss': 1.1963119506835938, 'test_accuracy': 0.8202764976958525, 'test_f1': 0.8306801736613603, 'test_precision': 0.782016348773842, 'test_recall': 0.8858024691358025, 'test_AUC': 0.8738626496016915, 'test_runtime': 0.4759, 'test_samples_per_second': 1368.067, 'test_steps_per_second': 23.116}
175 DEBERT-B 323 3e-05 1200
Model seed is: 323, total, training informative and uninf samples: 3256 ,             1284, 1321


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6444
200,0.5502
300,0.4095
400,0.2875
500,0.2261
600,0.1257
700,0.1049
800,0.0454
900,0.0351
1000,0.0371


for ratio of 175, model of DEBERT-B, seed of 323 performance is:
 0.502127659574468  and  {'test_loss': 1.4173073768615723, 'test_accuracy': 0.8003072196620584, 'test_f1': 0.8169014084507042, 'test_precision': 0.7923497267759563, 'test_recall': 0.8430232558139535, 'test_AUC': 0.8657961518066815, 'test_runtime': 0.475, 'test_samples_per_second': 1370.603, 'test_steps_per_second': 23.159}
175 DEBERT-B 200 3e-05 1200
Model seed is: 200, total, training informative and uninf samples: 3256 ,             1288, 1317


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6457
200,0.5103
300,0.4115
400,0.2944
500,0.2563
600,0.114
700,0.1374
800,0.0849
900,0.0486
1000,0.0401


for ratio of 175, model of DEBERT-B, seed of 200 performance is:
 0.4912529550827423  and  {'test_loss': 1.136521339416504, 'test_accuracy': 0.8064516129032258, 'test_f1': 0.8205128205128206, 'test_precision': 0.7955801104972375, 'test_recall': 0.8470588235294118, 'test_AUC': 0.8884717230943824, 'test_runtime': 0.4716, 'test_samples_per_second': 1380.433, 'test_steps_per_second': 23.325}
175 DEBERT-B 999 3e-05 1200
Model seed is: 999, total, training informative and uninf samples: 3256 ,             1301, 1304


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6714
200,0.52
300,0.4049
400,0.2582
500,0.219
600,0.1154
700,0.0859
800,0.0432
900,0.0238
1000,0.014


for ratio of 175, model of DEBERT-B, seed of 999 performance is:
 0.47139479905437354  and  {'test_loss': 1.333993673324585, 'test_accuracy': 0.8095238095238095, 'test_f1': 0.8213256484149856, 'test_precision': 0.776566757493188, 'test_recall': 0.8715596330275229, 'test_AUC': 0.889455204439914, 'test_runtime': 0.4698, 'test_samples_per_second': 1385.823, 'test_steps_per_second': 23.416}
175 DEBERT-L 9741 1e-05 1200
Model seed is: 9741, total, training informative and uninf samples: 3256 ,             1321, 1284


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6066
200,0.4551
300,0.3183
400,0.2006
500,0.162
600,0.0787
700,0.0475
800,0.0418
900,0.0169
1000,0.0154


for ratio of 175, model of DEBERT-L, seed of 9741 performance is:
 0.5295508274231678  and  {'test_loss': 1.3167107105255127, 'test_accuracy': 0.8172043010752689, 'test_f1': 0.8155038759689922, 'test_precision': 0.7781065088757396, 'test_recall': 0.8566775244299675, 'test_AUC': 0.8799617453223241, 'test_runtime': 1.2635, 'test_samples_per_second': 515.222, 'test_steps_per_second': 8.706}
175 DEBERT-L 1694 1e-05 1200
Model seed is: 1694, total, training informative and uninf samples: 3256 ,             1289, 1316


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6758
200,0.5304
300,0.4673
400,0.2873
500,0.2654
600,0.1512
700,0.136
800,0.0818
900,0.0504
1000,0.0135


for ratio of 175, model of DEBERT-L, seed of 1694 performance is:
 0.4794326241134752  and  {'test_loss': 1.2633397579193115, 'test_accuracy': 0.8202764976958525, 'test_f1': 0.832618025751073, 'test_precision': 0.8083333333333333, 'test_recall': 0.8584070796460177, 'test_AUC': 0.8666609938733832, 'test_runtime': 1.2642, 'test_samples_per_second': 514.959, 'test_steps_per_second': 8.701}
175 DEBERT-L 6932 1e-05 1200
Model seed is: 6932, total, training informative and uninf samples: 3256 ,             1293, 1312


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.644
200,0.5704
300,0.4057
400,0.288
500,0.2089
600,0.138
700,0.0882
800,0.0936
900,0.0327
1000,0.0277


for ratio of 175, model of DEBERT-L, seed of 6932 performance is:
 0.5352245862884161  and  {'test_loss': 1.2085762023925781, 'test_accuracy': 0.8310291858678955, 'test_f1': 0.8377581120943952, 'test_precision': 0.8279883381924198, 'test_recall': 0.8477611940298507, 'test_AUC': 0.8961647458907992, 'test_runtime': 1.2598, 'test_samples_per_second': 516.756, 'test_steps_per_second': 8.732}
175 DEBERT-L 94 1e-05 1200
Model seed is: 94, total, training informative and uninf samples: 3256 ,             1275, 1330


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6685
200,0.4608
300,0.4013
400,0.2817
500,0.2046
600,0.1281
700,0.0862
800,0.0809
900,0.0349
1000,0.0485


for ratio of 175, model of DEBERT-L, seed of 94 performance is:
 0.4940898345153664  and  {'test_loss': 1.3883609771728516, 'test_accuracy': 0.8033794162826421, 'test_f1': 0.8186968838526912, 'test_precision': 0.8186968838526912, 'test_recall': 0.8186968838526912, 'test_AUC': 0.869403197901021, 'test_runtime': 1.2548, 'test_samples_per_second': 518.798, 'test_steps_per_second': 8.766}
175 DEBERT-L 791 1e-05 1200
Model seed is: 791, total, training informative and uninf samples: 3256 ,             1287, 1318


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.5887
200,0.4825
300,0.333
400,0.2286
500,0.1656
600,0.1209
700,0.0478
800,0.0455
900,0.0202
1000,0.0126


for ratio of 175, model of DEBERT-L, seed of 791 performance is:
 0.5130023640661938  and  {'test_loss': 1.3742258548736572, 'test_accuracy': 0.8141321044546851, 'test_f1': 0.829817158931083, 'test_precision': 0.7972972972972973, 'test_recall': 0.8651026392961877, 'test_AUC': 0.8796613376217954, 'test_runtime': 1.27, 'test_samples_per_second': 512.607, 'test_steps_per_second': 8.662}
175 DEBERT-L 5 1e-05 1200
Model seed is: 5, total, training informative and uninf samples: 3256 ,             1274, 1331


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6786
200,0.52
300,0.4612
400,0.2777
500,0.254
600,0.1372
700,0.1081
800,0.0519
900,0.0501
1000,0.0283


for ratio of 175, model of DEBERT-L, seed of 5 performance is:
 0.5002364066193853  and  {'test_loss': 1.0502641201019287, 'test_accuracy': 0.8571428571428571, 'test_f1': 0.873469387755102, 'test_precision': 0.84251968503937, 'test_recall': 0.9067796610169492, 'test_AUC': 0.9075025204968707, 'test_runtime': 1.2621, 'test_samples_per_second': 515.805, 'test_steps_per_second': 8.716}
175 DEBERT-L 1759 1e-05 1200
Model seed is: 1759, total, training informative and uninf samples: 3256 ,             1304, 1301


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6841
200,0.5335
300,0.3911
400,0.2857
500,0.2521
600,0.1621
700,0.1195
800,0.1147
900,0.0512
1000,0.0497


for ratio of 175, model of DEBERT-L, seed of 1759 performance is:
 0.5068557919621749  and  {'test_loss': 1.2159708738327026, 'test_accuracy': 0.8310291858678955, 'test_f1': 0.8372781065088758, 'test_precision': 0.8039772727272727, 'test_recall': 0.8734567901234568, 'test_AUC': 0.8907860459848228, 'test_runtime': 1.2585, 'test_samples_per_second': 517.267, 'test_steps_per_second': 8.74}
175 DEBERT-L 323 1e-05 1200
Model seed is: 323, total, training informative and uninf samples: 3256 ,             1284, 1321


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.5934
200,0.4914
300,0.3333
400,0.2338
500,0.1924
600,0.1379
700,0.0812
800,0.0412
900,0.0278
1000,0.0287


for ratio of 175, model of DEBERT-L, seed of 323 performance is:
 0.4765957446808511  and  {'test_loss': 1.4517711400985718, 'test_accuracy': 0.8033794162826421, 'test_f1': 0.8197183098591548, 'test_precision': 0.7950819672131147, 'test_recall': 0.8459302325581395, 'test_AUC': 0.8850276494204984, 'test_runtime': 1.262, 'test_samples_per_second': 515.835, 'test_steps_per_second': 8.716}
175 DEBERT-L 200 1e-05 1200
Model seed is: 200, total, training informative and uninf samples: 3256 ,             1288, 1317


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6317
200,0.4611
300,0.3649
400,0.2684
500,0.1898
600,0.1178
700,0.1187
800,0.0556
900,0.0383
1000,0.0446


for ratio of 175, model of DEBERT-L, seed of 200 performance is:
 0.5148936170212766  and  {'test_loss': 1.1041404008865356, 'test_accuracy': 0.8310291858678955, 'test_f1': 0.845505617977528, 'test_precision': 0.8091397849462365, 'test_recall': 0.8852941176470588, 'test_AUC': 0.9003688292037073, 'test_runtime': 1.2588, 'test_samples_per_second': 517.171, 'test_steps_per_second': 8.739}
175 DEBERT-L 999 1e-05 1200
Model seed is: 999, total, training informative and uninf samples: 3256 ,             1301, 1304


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.5957
200,0.5287
300,0.4039
400,0.2982
500,0.2602
600,0.1837
700,0.1587
800,0.1171
900,0.0721
1000,0.0743


for ratio of 175, model of DEBERT-L, seed of 999 performance is:
 0.5101654846335697  and  {'test_loss': 1.134877324104309, 'test_accuracy': 0.8279569892473119, 'test_f1': 0.8390804597701149, 'test_precision': 0.7913279132791328, 'test_recall': 0.8929663608562691, 'test_AUC': 0.8984784988862461, 'test_runtime': 1.2521, 'test_samples_per_second': 519.918, 'test_steps_per_second': 8.785}
175 XLM-B 9741 2e-05 1200
Model seed is: 9741, total, training informative and uninf samples: 3256 ,             1321, 1284


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6234
200,0.5374
300,0.4429
400,0.38
500,0.3306
600,0.2451
700,0.1855
800,0.1767
900,0.1112
1000,0.1211


for ratio of 175, model of XLM-B, seed of 9741 performance is:
 0.4846335697399527  and  {'test_loss': 1.0232150554656982, 'test_accuracy': 0.8079877112135176, 'test_f1': 0.8037676609105181, 'test_precision': 0.7757575757575758, 'test_recall': 0.8338762214983714, 'test_AUC': 0.8665631391561244, 'test_runtime': 0.3821, 'test_samples_per_second': 1703.681, 'test_steps_per_second': 28.787}
175 XLM-B 1694 2e-05 1200
Model seed is: 1694, total, training informative and uninf samples: 3256 ,             1289, 1316


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6739
200,0.5805
300,0.5371
400,0.4182
500,0.3828
600,0.2774
700,0.2595
800,0.1828
900,0.1317
1000,0.1232


for ratio of 175, model of XLM-B, seed of 1694 performance is:
 0.5044917257683215  and  {'test_loss': 1.0217710733413696, 'test_accuracy': 0.7987711213517665, 'test_f1': 0.8167832167832169, 'test_precision': 0.776595744680851, 'test_recall': 0.8613569321533924, 'test_AUC': 0.8710384993570833, 'test_runtime': 0.3816, 'test_samples_per_second': 1705.828, 'test_steps_per_second': 28.824}
175 XLM-B 6932 2e-05 1200
Model seed is: 6932, total, training informative and uninf samples: 3256 ,             1293, 1312


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6491
200,0.5464
300,0.4961
400,0.4099
500,0.3419
600,0.253
700,0.2111
800,0.1765
900,0.1304
1000,0.1175


for ratio of 175, model of XLM-B, seed of 6932 performance is:
 0.47423167848699765  and  {'test_loss': 1.0875868797302246, 'test_accuracy': 0.8003072196620584, 'test_f1': 0.8099415204678362, 'test_precision': 0.7936962750716332, 'test_recall': 0.826865671641791, 'test_AUC': 0.8683733232571321, 'test_runtime': 0.3823, 'test_samples_per_second': 1703.064, 'test_steps_per_second': 28.777}
175 XLM-B 94 2e-05 1200
Model seed is: 94, total, training informative and uninf samples: 3256 ,             1275, 1330


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6885
200,0.5748
300,0.4877
400,0.4152
500,0.3743
600,0.2638
700,0.2067
800,0.2018
900,0.1316
1000,0.1214


for ratio of 175, model of XLM-B, seed of 94 performance is:
 0.49314420803782505  and  {'test_loss': 0.9985716342926025, 'test_accuracy': 0.8233486943164362, 'test_f1': 0.8391608391608391, 'test_precision': 0.8287292817679558, 'test_recall': 0.8498583569405099, 'test_AUC': 0.867245280149058, 'test_runtime': 0.3843, 'test_samples_per_second': 1694.012, 'test_steps_per_second': 28.624}
175 XLM-B 791 2e-05 1200
Model seed is: 791, total, training informative and uninf samples: 3256 ,             1287, 1318


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6427
200,0.5674
300,0.4774
400,0.4081
500,0.3748
600,0.2722
700,0.2432
800,0.2168
900,0.1512
1000,0.1856


for ratio of 175, model of XLM-B, seed of 791 performance is:
 0.4884160756501182  and  {'test_loss': 1.0587923526763916, 'test_accuracy': 0.7956989247311828, 'test_f1': 0.8150208623087623, 'test_precision': 0.7751322751322751, 'test_recall': 0.8592375366568915, 'test_AUC': 0.8763976917983163, 'test_runtime': 0.3837, 'test_samples_per_second': 1696.512, 'test_steps_per_second': 28.666}
175 XLM-B 5 2e-05 1200
Model seed is: 5, total, training informative and uninf samples: 3256 ,             1274, 1331


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6748
200,0.6068
300,0.5352
400,0.4477
500,0.4242
600,0.326
700,0.2804
800,0.2246
900,0.1669
1000,0.1712


for ratio of 175, model of XLM-B, seed of 5 performance is:
 0.4950354609929078  and  {'test_loss': 0.7351127862930298, 'test_accuracy': 0.8387096774193549, 'test_f1': 0.8563611491108072, 'test_precision': 0.830238726790451, 'test_recall': 0.884180790960452, 'test_AUC': 0.9049439783903062, 'test_runtime': 0.38, 'test_samples_per_second': 1713.032, 'test_steps_per_second': 28.945}
175 XLM-B 1759 2e-05 1200
Model seed is: 1759, total, training informative and uninf samples: 3256 ,             1304, 1301


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6667
200,0.5941
300,0.5933
400,0.4991
500,0.4568
600,0.3501
700,0.3274
800,0.2707
900,0.2143
1000,0.2351


for ratio of 175, model of XLM-B, seed of 1759 performance is:
 0.548936170212766  and  {'test_loss': 0.825644850730896, 'test_accuracy': 0.7956989247311828, 'test_f1': 0.8052708638360175, 'test_precision': 0.766016713091922, 'test_recall': 0.8487654320987654, 'test_AUC': 0.8814229622078755, 'test_runtime': 0.3843, 'test_samples_per_second': 1694.136, 'test_steps_per_second': 28.626}
175 XLM-B 323 2e-05 1200
Model seed is: 323, total, training informative and uninf samples: 3256 ,             1284, 1321


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6435
200,0.593
300,0.4763
400,0.3742
500,0.3351
600,0.2377
700,0.1934
800,0.2028
900,0.1146
1000,0.1094


for ratio of 175, model of XLM-B, seed of 323 performance is:
 0.5002364066193853  and  {'test_loss': 1.2165732383728027, 'test_accuracy': 0.804915514592934, 'test_f1': 0.8218793828892005, 'test_precision': 0.7940379403794038, 'test_recall': 0.8517441860465116, 'test_AUC': 0.8646598742519507, 'test_runtime': 0.3784, 'test_samples_per_second': 1720.446, 'test_steps_per_second': 29.071}
175 XLM-B 200 2e-05 1200
Model seed is: 200, total, training informative and uninf samples: 3256 ,             1288, 1317


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6523
200,0.5562
300,0.493
400,0.4038
500,0.3732
600,0.2509
700,0.2511
800,0.1926
900,0.1655
1000,0.1268


for ratio of 175, model of XLM-B, seed of 200 performance is:
 0.4950354609929078  and  {'test_loss': 0.8802228569984436, 'test_accuracy': 0.8172043010752689, 'test_f1': 0.8316831683168315, 'test_precision': 0.8010899182561307, 'test_recall': 0.8647058823529412, 'test_AUC': 0.8871098921883867, 'test_runtime': 0.3799, 'test_samples_per_second': 1713.834, 'test_steps_per_second': 28.959}
175 XLM-B 999 2e-05 1200
Model seed is: 999, total, training informative and uninf samples: 3256 ,             1301, 1304


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6891
200,0.6447
300,0.5862
400,0.5465
500,0.4942
600,0.3922
700,0.3677
800,0.3059
900,0.2283
1000,0.2039


for ratio of 175, model of XLM-B, seed of 999 performance is:
 0.5068557919621749  and  {'test_loss': 0.7700793147087097, 'test_accuracy': 0.7910906298003072, 'test_f1': 0.8062678062678063, 'test_precision': 0.7546666666666667, 'test_recall': 0.8654434250764526, 'test_AUC': 0.8584494280212935, 'test_runtime': 0.3801, 'test_samples_per_second': 1712.676, 'test_steps_per_second': 28.939}
175 XLM-L 9741 8e-06 1200
Model seed is: 9741, total, training informative and uninf samples: 3256 ,             1321, 1284


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6742
200,0.5702
300,0.4726
400,0.3953
500,0.3682
600,0.306
700,0.2166
800,0.2036
900,0.1765
1000,0.1699


for ratio of 175, model of XLM-L, seed of 9741 performance is:
 0.49030732860520093  and  {'test_loss': 0.9301187992095947, 'test_accuracy': 0.8110599078341014, 'test_f1': 0.8069073783359498, 'test_precision': 0.7787878787878788, 'test_recall': 0.8371335504885994, 'test_AUC': 0.8786550261343837, 'test_runtime': 1.088, 'test_samples_per_second': 598.34, 'test_steps_per_second': 10.11}
175 XLM-L 1694 8e-06 1200
Model seed is: 1694, total, training informative and uninf samples: 3256 ,             1289, 1316


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6772
200,0.5608
300,0.5219
400,0.3907
500,0.3776
600,0.2872
700,0.2341
800,0.2216
900,0.1694
1000,0.1572


for ratio of 175, model of XLM-L, seed of 1694 performance is:
 0.4950354609929078  and  {'test_loss': 0.948809802532196, 'test_accuracy': 0.8018433179723502, 'test_f1': 0.8175388967468176, 'test_precision': 0.7853260869565217, 'test_recall': 0.8525073746312685, 'test_AUC': 0.8735250737463127, 'test_runtime': 1.0896, 'test_samples_per_second': 597.471, 'test_steps_per_second': 10.096}
175 XLM-L 6932 8e-06 1200
Model seed is: 6932, total, training informative and uninf samples: 3256 ,             1293, 1312


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6979
200,0.574
300,0.4988
400,0.4057
500,0.3504
600,0.2659
700,0.2191
800,0.1772
900,0.1236
1000,0.1522


for ratio of 175, model of XLM-L, seed of 6932 performance is:
 0.5281323877068558  and  {'test_loss': 0.9358174800872803, 'test_accuracy': 0.8095238095238095, 'test_f1': 0.8176470588235294, 'test_precision': 0.8057971014492754, 'test_recall': 0.8298507462686567, 'test_AUC': 0.8897411675798224, 'test_runtime': 1.0819, 'test_samples_per_second': 601.708, 'test_steps_per_second': 10.167}
175 XLM-L 94 8e-06 1200
Model seed is: 94, total, training informative and uninf samples: 3256 ,             1275, 1330


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.7117
200,0.6585
300,0.5833
400,0.4836
500,0.4324
600,0.385
700,0.316
800,0.3023
900,0.2186
1000,0.2172


for ratio of 175, model of XLM-L, seed of 94 performance is:
 0.466193853427896  and  {'test_loss': 0.6747894287109375, 'test_accuracy': 0.8064516129032258, 'test_f1': 0.8189655172413793, 'test_precision': 0.8309037900874635, 'test_recall': 0.8073654390934845, 'test_AUC': 0.8817042797117706, 'test_runtime': 1.078, 'test_samples_per_second': 603.917, 'test_steps_per_second': 10.204}
175 XLM-L 791 8e-06 1200
Model seed is: 791, total, training informative and uninf samples: 3256 ,             1287, 1318


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6859
200,0.582
300,0.4727
400,0.4139
500,0.3683
600,0.3084
700,0.2629
800,0.2221
900,0.2012
1000,0.1639


for ratio of 175, model of XLM-L, seed of 791 performance is:
 0.4907801418439716  and  {'test_loss': 0.8541372418403625, 'test_accuracy': 0.8079877112135176, 'test_f1': 0.8251748251748252, 'test_precision': 0.7887700534759359, 'test_recall': 0.8651026392961877, 'test_AUC': 0.885157506385394, 'test_runtime': 1.0801, 'test_samples_per_second': 602.736, 'test_steps_per_second': 10.184}
175 XLM-L 5 8e-06 1200
Model seed is: 5, total, training informative and uninf samples: 3256 ,             1274, 1331


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.698
200,0.5773
300,0.5231
400,0.4475
500,0.4067
600,0.3317
700,0.3019
800,0.2424
900,0.1865
1000,0.1948


for ratio of 175, model of XLM-L, seed of 5 performance is:
 0.47754137115839246  and  {'test_loss': 0.7436240315437317, 'test_accuracy': 0.815668202764977, 'test_f1': 0.828080229226361, 'test_precision': 0.8401162790697675, 'test_recall': 0.8163841807909604, 'test_AUC': 0.890724571515532, 'test_runtime': 1.0812, 'test_samples_per_second': 602.126, 'test_steps_per_second': 10.174}
175 XLM-L 1759 8e-06 1200
Model seed is: 1759, total, training informative and uninf samples: 3256 ,             1304, 1301


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.7308
200,0.6382
300,0.5764
400,0.5096
500,0.4742
600,0.4167
700,0.3668
800,0.3583
900,0.284
1000,0.3088


for ratio of 175, model of XLM-L, seed of 1759 performance is:
 0.5215130023640662  and  {'test_loss': 0.5741868019104004, 'test_accuracy': 0.8003072196620584, 'test_f1': 0.8126801152737751, 'test_precision': 0.7621621621621621, 'test_recall': 0.8703703703703703, 'test_AUC': 0.8861139426888663, 'test_runtime': 1.0909, 'test_samples_per_second': 596.738, 'test_steps_per_second': 10.083}
175 XLM-L 323 8e-06 1200
Model seed is: 323, total, training informative and uninf samples: 3256 ,             1284, 1321


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6585
200,0.5762
300,0.43
400,0.3511
500,0.3463
600,0.2463
700,0.2094
800,0.1654
900,0.1128
1000,0.142


for ratio of 175, model of XLM-L, seed of 323 performance is:
 0.502127659574468  and  {'test_loss': 1.3145755529403687, 'test_accuracy': 0.7849462365591398, 'test_f1': 0.8060941828254848, 'test_precision': 0.7698412698412699, 'test_recall': 0.8459302325581395, 'test_AUC': 0.874924248163018, 'test_runtime': 1.0796, 'test_samples_per_second': 603.015, 'test_steps_per_second': 10.189}
175 XLM-L 200 8e-06 1200
Model seed is: 200, total, training informative and uninf samples: 3256 ,             1288, 1317


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6777
200,0.5766
300,0.5333
400,0.471
500,0.4114
600,0.3463
700,0.3169
800,0.2713
900,0.2357
1000,0.1818


for ratio of 175, model of XLM-L, seed of 200 performance is:
 0.5205673758865248  and  {'test_loss': 0.6835460066795349, 'test_accuracy': 0.8248847926267281, 'test_f1': 0.8385269121813032, 'test_precision': 0.8087431693989071, 'test_recall': 0.8705882352941177, 'test_AUC': 0.9028560620389636, 'test_runtime': 1.09, 'test_samples_per_second': 597.238, 'test_steps_per_second': 10.092}
175 XLM-L 999 8e-06 1200
Model seed is: 999, total, training informative and uninf samples: 3256 ,             1301, 1304


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6894
200,0.5965
300,0.5558
400,0.4562
500,0.4205
600,0.3425
700,0.317
800,0.2876
900,0.2019
1000,0.2084


for ratio of 175, model of XLM-L, seed of 999 performance is:
 0.573049645390071  and  {'test_loss': 0.7847762703895569, 'test_accuracy': 0.794162826420891, 'test_f1': 0.814404432132964, 'test_precision': 0.7443037974683544, 'test_recall': 0.8990825688073395, 'test_AUC': 0.882272435534413, 'test_runtime': 1.0972, 'test_samples_per_second': 593.321, 'test_steps_per_second': 10.025}
number of training samples:  (2605, 13)
110 ALBERT-L 9741 8e-06 1000
Model seed is: 9741, total, training informative and uninf samples: 2778 ,             1107, 1115


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6474
200,0.5309
300,0.4309
400,0.3146
500,0.2297
600,0.196
700,0.1396
800,0.0889
900,0.0725
1000,0.0496


for ratio of 110, model of ALBERT-L, seed of 9741 performance is:
 0.5050462573591253  and  {'test_loss': 1.009264349937439, 'test_accuracy': 0.789568345323741, 'test_f1': 0.7950963222416814, 'test_precision': 0.7854671280276817, 'test_recall': 0.8049645390070922, 'test_AUC': 0.850002588393643, 'test_runtime': 1.0388, 'test_samples_per_second': 535.222, 'test_steps_per_second': 8.664}
110 ALBERT-L 1694 8e-06 1000
Model seed is: 1694, total, training informative and uninf samples: 2778 ,             1100, 1122


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6379
200,0.5014
300,0.4383
400,0.351
500,0.2446
600,0.186
700,0.1469
800,0.0954
900,0.0825
1000,0.0708


for ratio of 110, model of ALBERT-L, seed of 1694 performance is:
 0.47897392767031116  and  {'test_loss': 1.030073881149292, 'test_accuracy': 0.7949640287769785, 'test_f1': 0.8027681660899654, 'test_precision': 0.8027681660899654, 'test_recall': 0.8027681660899654, 'test_AUC': 0.8549563910164197, 'test_runtime': 1.038, 'test_samples_per_second': 535.63, 'test_steps_per_second': 8.67}
110 ALBERT-L 6932 8e-06 1000
Model seed is: 6932, total, training informative and uninf samples: 2778 ,             1110, 1112


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6134
200,0.4492
300,0.3827
400,0.2789
500,0.1667
600,0.1453
700,0.1049
800,0.0513
900,0.0457
1000,0.0397


for ratio of 110, model of ALBERT-L, seed of 6932 performance is:
 0.5407905803195963  and  {'test_loss': 0.9509150981903076, 'test_accuracy': 0.789568345323741, 'test_f1': 0.7986230636833047, 'test_precision': 0.7682119205298014, 'test_recall': 0.8315412186379928, 'test_AUC': 0.8596068993180906, 'test_runtime': 1.0443, 'test_samples_per_second': 532.392, 'test_steps_per_second': 8.618}
110 ALBERT-L 94 8e-06 1000
Model seed is: 94, total, training informative and uninf samples: 2778 ,             1100, 1122


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6759
200,0.5577
300,0.463
400,0.355
500,0.2897
600,0.1964
700,0.1592
800,0.1258
900,0.1063
1000,0.0555


for ratio of 110, model of ALBERT-L, seed of 94 performance is:
 0.44785534062237176  and  {'test_loss': 1.0659383535385132, 'test_accuracy': 0.7967625899280576, 'test_f1': 0.800705467372134, 'test_precision': 0.8165467625899281, 'test_recall': 0.7854671280276817, 'test_AUC': 0.8449775151303086, 'test_runtime': 1.0407, 'test_samples_per_second': 534.23, 'test_steps_per_second': 8.648}
110 ALBERT-L 791 8e-06 1000
Model seed is: 791, total, training informative and uninf samples: 2778 ,             1106, 1116


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6358
200,0.5164
300,0.394
400,0.3123
500,0.2063
600,0.1511
700,0.1024
800,0.0478
900,0.0438
1000,0.0305


for ratio of 110, model of ALBERT-L, seed of 791 performance is:
 0.5012615643397813  and  {'test_loss': 1.2038099765777588, 'test_accuracy': 0.7661870503597122, 'test_f1': 0.7661870503597122, 'test_precision': 0.7802197802197802, 'test_recall': 0.7526501766784452, 'test_AUC': 0.8165780038571557, 'test_runtime': 1.0428, 'test_samples_per_second': 533.19, 'test_steps_per_second': 8.631}
110 ALBERT-L 5 8e-06 1000
Model seed is: 5, total, training informative and uninf samples: 2778 ,             1098, 1124


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.8008
200,0.7991
300,0.6969
400,0.6999
500,0.6988
600,0.6713
700,0.7102
800,0.6811
900,0.6516
1000,0.6202


for ratio of 110, model of ALBERT-L, seed of 5 performance is:
 0.6021867115222876  and  {'test_loss': 0.6326956152915955, 'test_accuracy': 0.6546762589928058, 'test_f1': 0.673469387755102, 'test_precision': 0.6666666666666666, 'test_recall': 0.6804123711340206, 'test_AUC': 0.6941191726642029, 'test_runtime': 1.0483, 'test_samples_per_second': 530.393, 'test_steps_per_second': 8.585}
110 ALBERT-L 1759 8e-06 1000
Model seed is: 1759, total, training informative and uninf samples: 2778 ,             1112, 1110


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6326
200,0.4993
300,0.4071
400,0.3291
500,0.2237
600,0.1667
700,0.1073
800,0.054
900,0.0527
1000,0.0298


for ratio of 110, model of ALBERT-L, seed of 1759 performance is:
 0.48317914213624896  and  {'test_loss': 1.0372871160507202, 'test_accuracy': 0.7715827338129496, 'test_f1': 0.7719928186714543, 'test_precision': 0.7678571428571429, 'test_recall': 0.776173285198556, 'test_AUC': 0.8518691044602307, 'test_runtime': 1.0462, 'test_samples_per_second': 531.461, 'test_steps_per_second': 8.603}
110 ALBERT-L 323 8e-06 1000
Model seed is: 323, total, training informative and uninf samples: 2778 ,             1109, 1113


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6352
200,0.5203
300,0.4332
400,0.3591
500,0.2639
600,0.2063
700,0.1281
800,0.0771
900,0.0554
1000,0.052


for ratio of 110, model of ALBERT-L, seed of 323 performance is:
 0.49957947855340623  and  {'test_loss': 0.8779711723327637, 'test_accuracy': 0.7823741007194245, 'test_f1': 0.7902946273830156, 'test_precision': 0.7676767676767676, 'test_recall': 0.8142857142857143, 'test_AUC': 0.8576863354037266, 'test_runtime': 1.0378, 'test_samples_per_second': 535.73, 'test_steps_per_second': 8.672}
110 ALBERT-L 200 8e-06 1000
Model seed is: 200, total, training informative and uninf samples: 2778 ,             1125, 1097


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6335
200,0.4955
300,0.4068
400,0.3237
500,0.2247
600,0.1876
700,0.1058
800,0.0692
900,0.0543
1000,0.0342


for ratio of 110, model of ALBERT-L, seed of 200 performance is:
 0.4646761984861228  and  {'test_loss': 0.8694810271263123, 'test_accuracy': 0.8201438848920863, 'test_f1': 0.8091603053435115, 'test_precision': 0.8153846153846154, 'test_recall': 0.803030303030303, 'test_AUC': 0.8916173723536738, 'test_runtime': 1.0361, 'test_samples_per_second': 536.608, 'test_steps_per_second': 8.686}
110 ALBERT-L 999 8e-06 1000
Model seed is: 999, total, training informative and uninf samples: 2778 ,             1105, 1117


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-large-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6616
200,0.5159
300,0.4242
400,0.3218
500,0.2424
600,0.1472
700,0.1261
800,0.0877
900,0.047
1000,0.0439


for ratio of 110, model of ALBERT-L, seed of 999 performance is:
 0.5109335576114382  and  {'test_loss': 1.0848090648651123, 'test_accuracy': 0.8039568345323741, 'test_f1': 0.8149405772495756, 'test_precision': 0.7868852459016393, 'test_recall': 0.8450704225352113, 'test_AUC': 0.8411091549295774, 'test_runtime': 1.0448, 'test_samples_per_second': 532.166, 'test_steps_per_second': 8.614}
110 DISRoBERTa-B 9741 2e-05 1000
Model seed is: 9741, total, training informative and uninf samples: 2778 ,             1107, 1115


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6317
200,0.4887
300,0.3778
400,0.2869
500,0.1985
600,0.2021
700,0.1232
800,0.0778
900,0.0694
1000,0.0461


for ratio of 110, model of DISRoBERTa-B, seed of 9741 performance is:
 0.4928511354079058  and  {'test_loss': 0.9766098260879517, 'test_accuracy': 0.814748201438849, 'test_f1': 0.8245315161839863, 'test_precision': 0.7934426229508197, 'test_recall': 0.8581560283687943, 'test_AUC': 0.891481596521199, 'test_runtime': 0.1901, 'test_samples_per_second': 2924.407, 'test_steps_per_second': 47.338}
110 DISRoBERTa-B 1694 2e-05 1000
Model seed is: 1694, total, training informative and uninf samples: 2778 ,             1100, 1122


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6116
200,0.4787
300,0.4085
400,0.3086
500,0.2141
600,0.1541
700,0.1517
800,0.0935
900,0.0808
1000,0.0476


for ratio of 110, model of DISRoBERTa-B, seed of 1694 performance is:
 0.48486122792262404  and  {'test_loss': 1.035205602645874, 'test_accuracy': 0.789568345323741, 'test_f1': 0.8020304568527918, 'test_precision': 0.7847682119205298, 'test_recall': 0.8200692041522492, 'test_AUC': 0.8652722159584256, 'test_runtime': 0.1898, 'test_samples_per_second': 2929.381, 'test_steps_per_second': 47.418}
110 DISRoBERTa-B 6932 2e-05 1000
Model seed is: 6932, total, training informative and uninf samples: 2778 ,             1110, 1112


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6101
200,0.4692
300,0.3983
400,0.2948
500,0.2157
600,0.1448
700,0.1321
800,0.0826
900,0.0483
1000,0.0463


for ratio of 110, model of DISRoBERTa-B, seed of 6932 performance is:
 0.4634146341463415  and  {'test_loss': 1.0770673751831055, 'test_accuracy': 0.7985611510791367, 'test_f1': 0.8, 'test_precision': 0.797153024911032, 'test_recall': 0.8028673835125448, 'test_AUC': 0.8752118835966514, 'test_runtime': 0.1872, 'test_samples_per_second': 2970.807, 'test_steps_per_second': 48.089}
110 DISRoBERTa-B 94 2e-05 1000
Model seed is: 94, total, training informative and uninf samples: 2778 ,             1100, 1122


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.5953
200,0.5067
300,0.3848
400,0.3022
500,0.2162
600,0.1531
700,0.1305
800,0.0829
900,0.0649
1000,0.0475


for ratio of 110, model of DISRoBERTa-B, seed of 94 performance is:
 0.4861227922624054  and  {'test_loss': 1.0170217752456665, 'test_accuracy': 0.7967625899280576, 'test_f1': 0.8094435075885328, 'test_precision': 0.7894736842105263, 'test_recall': 0.8304498269896193, 'test_AUC': 0.8699506240037322, 'test_runtime': 0.1853, 'test_samples_per_second': 3000.368, 'test_steps_per_second': 48.567}
110 DISRoBERTa-B 791 2e-05 1000
Model seed is: 791, total, training informative and uninf samples: 2778 ,             1106, 1116


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6279
200,0.4953
300,0.3866
400,0.2976
500,0.2094
600,0.1685
700,0.1168
800,0.0709
900,0.0703
1000,0.0511


for ratio of 110, model of DISRoBERTa-B, seed of 791 performance is:
 0.4726661059714045  and  {'test_loss': 1.0332274436950684, 'test_accuracy': 0.7931654676258992, 'test_f1': 0.7978910369068541, 'test_precision': 0.7937062937062938, 'test_recall': 0.8021201413427562, 'test_AUC': 0.8685460593587803, 'test_runtime': 0.1895, 'test_samples_per_second': 2934.032, 'test_steps_per_second': 47.493}
110 DISRoBERTa-B 5 2e-05 1000
Model seed is: 5, total, training informative and uninf samples: 2778 ,             1098, 1124


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.605
200,0.4917
300,0.423
400,0.3201
500,0.2282
600,0.1978
700,0.1462
800,0.1044
900,0.0929
1000,0.0719


for ratio of 110, model of DISRoBERTa-B, seed of 5 performance is:
 0.471825063078217  and  {'test_loss': 0.9228127002716064, 'test_accuracy': 0.8111510791366906, 'test_f1': 0.8198970840480275, 'test_precision': 0.8184931506849316, 'test_recall': 0.8213058419243986, 'test_AUC': 0.8867405822472929, 'test_runtime': 0.1849, 'test_samples_per_second': 3006.728, 'test_steps_per_second': 48.67}
110 DISRoBERTa-B 1759 2e-05 1000
Model seed is: 1759, total, training informative and uninf samples: 2778 ,             1112, 1110


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6213
200,0.4814
300,0.3669
400,0.2851
500,0.1868
600,0.1623
700,0.1147
800,0.0699
900,0.0632
1000,0.0517


for ratio of 110, model of DISRoBERTa-B, seed of 1759 performance is:
 0.5164003364171573  and  {'test_loss': 1.1671115159988403, 'test_accuracy': 0.8057553956834532, 'test_f1': 0.8137931034482758, 'test_precision': 0.7788778877887789, 'test_recall': 0.851985559566787, 'test_AUC': 0.8649508947634019, 'test_runtime': 0.1858, 'test_samples_per_second': 2991.965, 'test_steps_per_second': 48.431}
110 DISRoBERTa-B 323 2e-05 1000
Model seed is: 323, total, training informative and uninf samples: 2778 ,             1109, 1113


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6201
200,0.465
300,0.3701
400,0.2792
500,0.1924
600,0.1624
700,0.1239
800,0.0673
900,0.0579
1000,0.0485


for ratio of 110, model of DISRoBERTa-B, seed of 323 performance is:
 0.47224558452481075  and  {'test_loss': 1.0860960483551025, 'test_accuracy': 0.802158273381295, 'test_f1': 0.8129251700680272, 'test_precision': 0.775974025974026, 'test_recall': 0.8535714285714285, 'test_AUC': 0.8753364389233954, 'test_runtime': 0.1933, 'test_samples_per_second': 2876.784, 'test_steps_per_second': 46.567}
110 DISRoBERTa-B 200 2e-05 1000
Model seed is: 200, total, training informative and uninf samples: 2778 ,             1125, 1097


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6022
200,0.4541
300,0.3864
400,0.309
500,0.2167
600,0.1761
700,0.1303
800,0.1049
900,0.0808
1000,0.0474


for ratio of 110, model of DISRoBERTa-B, seed of 200 performance is:
 0.47560975609756095  and  {'test_loss': 0.9109463691711426, 'test_accuracy': 0.8183453237410072, 'test_f1': 0.8173598553345389, 'test_precision': 0.7820069204152249, 'test_recall': 0.8560606060606061, 'test_AUC': 0.9008730282274802, 'test_runtime': 0.1918, 'test_samples_per_second': 2899.439, 'test_steps_per_second': 46.933}
110 DISRoBERTa-B 999 2e-05 1000
Model seed is: 999, total, training informative and uninf samples: 2778 ,             1105, 1117


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.5937
200,0.4557
300,0.3624
400,0.2717
500,0.1989
600,0.1636
700,0.1112
800,0.0999
900,0.0451
1000,0.0591


for ratio of 110, model of DISRoBERTa-B, seed of 999 performance is:
 0.4600504625735913  and  {'test_loss': 0.9518455862998962, 'test_accuracy': 0.8039568345323741, 'test_f1': 0.8123924268502583, 'test_precision': 0.7946127946127947, 'test_recall': 0.8309859154929577, 'test_AUC': 0.8939001657000829, 'test_runtime': 0.1867, 'test_samples_per_second': 2978.525, 'test_steps_per_second': 48.214}
110 XLNet-B 9741 2e-05 1000
Model seed is: 9741, total, training informative and uninf samples: 2778 ,             1107, 1115


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6873
200,0.5517
300,0.4427
400,0.3326
500,0.2308
600,0.1916
700,0.1287
800,0.0812
900,0.0695
1000,0.0432


for ratio of 110, model of XLNet-B, seed of 9741 performance is:
 0.5462573591253154  and  {'test_loss': 1.211991310119629, 'test_accuracy': 0.7985611510791367, 'test_f1': 0.8127090301003345, 'test_precision': 0.7689873417721519, 'test_recall': 0.8617021276595744, 'test_AUC': 0.8749288191748201, 'test_runtime': 0.4356, 'test_samples_per_second': 1276.343, 'test_steps_per_second': 20.66}
110 XLNet-B 1694 2e-05 1000
Model seed is: 1694, total, training informative and uninf samples: 2778 ,             1100, 1122


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6246
200,0.4973
300,0.4538
400,0.3199
500,0.2454
600,0.1622
700,0.1562
800,0.0835
900,0.0712
1000,0.0411


for ratio of 110, model of XLNet-B, seed of 1694 performance is:
 0.5529857022708158  and  {'test_loss': 1.2294920682907104, 'test_accuracy': 0.7805755395683454, 'test_f1': 0.8038585209003215, 'test_precision': 0.7507507507507507, 'test_recall': 0.8650519031141869, 'test_AUC': 0.8574575897774841, 'test_runtime': 0.4359, 'test_samples_per_second': 1275.41, 'test_steps_per_second': 20.645}
110 XLNet-B 6932 2e-05 1000
Model seed is: 6932, total, training informative and uninf samples: 2778 ,             1110, 1112


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6601
200,0.515
300,0.4468
400,0.3426
500,0.2637
600,0.203
700,0.1785
800,0.0932
900,0.0579
1000,0.0557


for ratio of 110, model of XLNet-B, seed of 6932 performance is:
 0.5138772077375946  and  {'test_loss': 1.0518332719802856, 'test_accuracy': 0.7949640287769785, 'test_f1': 0.8006993006993006, 'test_precision': 0.7815699658703071, 'test_recall': 0.8207885304659498, 'test_AUC': 0.8819404008643557, 'test_runtime': 0.4395, 'test_samples_per_second': 1265.143, 'test_steps_per_second': 20.479}
110 XLNet-B 94 2e-05 1000
Model seed is: 94, total, training informative and uninf samples: 2778 ,             1100, 1122


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6165
200,0.5158
300,0.4105
400,0.3191
500,0.2105
600,0.1416
700,0.1214
800,0.0751
900,0.054
1000,0.03


for ratio of 110, model of XLNet-B, seed of 94 performance is:
 0.511354079058032  and  {'test_loss': 1.244087815284729, 'test_accuracy': 0.7859712230215827, 'test_f1': 0.8039538714991764, 'test_precision': 0.7672955974842768, 'test_recall': 0.8442906574394463, 'test_AUC': 0.8669699208169719, 'test_runtime': 0.4317, 'test_samples_per_second': 1287.944, 'test_steps_per_second': 20.848}
110 XLNet-B 791 2e-05 1000
Model seed is: 791, total, training informative and uninf samples: 2778 ,             1106, 1116


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6456
200,0.5724
300,0.4612
400,0.3673
500,0.2592
600,0.189
700,0.1384
800,0.077
900,0.0853
1000,0.052


for ratio of 110, model of XLNet-B, seed of 791 performance is:
 0.5538267451640033  and  {'test_loss': 1.388332486152649, 'test_accuracy': 0.7589928057553957, 'test_f1': 0.7817589576547231, 'test_precision': 0.7250755287009063, 'test_recall': 0.8480565371024735, 'test_AUC': 0.8417012904645413, 'test_runtime': 0.4363, 'test_samples_per_second': 1274.399, 'test_steps_per_second': 20.629}
110 XLNet-B 5 2e-05 1000
Model seed is: 5, total, training informative and uninf samples: 2778 ,             1098, 1124


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6524
200,0.5362
300,0.4766
400,0.3505
500,0.2634
600,0.1976
700,0.1553
800,0.0929
900,0.0606
1000,0.0479


for ratio of 110, model of XLNet-B, seed of 5 performance is:
 0.5492010092514719  and  {'test_loss': 1.1485271453857422, 'test_accuracy': 0.7967625899280576, 'test_f1': 0.8132231404958677, 'test_precision': 0.7834394904458599, 'test_recall': 0.845360824742268, 'test_AUC': 0.8723335278480191, 'test_runtime': 0.4381, 'test_samples_per_second': 1268.994, 'test_steps_per_second': 20.541}
110 XLNet-B 1759 2e-05 1000
Model seed is: 1759, total, training informative and uninf samples: 2778 ,             1112, 1110


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6226
200,0.5215
300,0.402
400,0.3498
500,0.2048
600,0.1809
700,0.122
800,0.0577
900,0.0678
1000,0.0317


for ratio of 110, model of XLNet-B, seed of 1759 performance is:
 0.5109335576114382  and  {'test_loss': 1.225437879562378, 'test_accuracy': 0.7715827338129496, 'test_f1': 0.7829059829059828, 'test_precision': 0.7435064935064936, 'test_recall': 0.8267148014440433, 'test_AUC': 0.8654425941021958, 'test_runtime': 0.435, 'test_samples_per_second': 1278.017, 'test_steps_per_second': 20.687}
110 XLNet-B 323 2e-05 1000
Model seed is: 323, total, training informative and uninf samples: 2778 ,             1109, 1113


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6675
200,0.5151
300,0.4678
400,0.3598
500,0.2592
600,0.1899
700,0.122
800,0.0953
900,0.0565
1000,0.0704


for ratio of 110, model of XLNet-B, seed of 323 performance is:
 0.46719932716568546  and  {'test_loss': 1.1406095027923584, 'test_accuracy': 0.7913669064748201, 'test_f1': 0.8006872852233677, 'test_precision': 0.7715231788079471, 'test_recall': 0.8321428571428572, 'test_AUC': 0.8804347826086956, 'test_runtime': 0.44, 'test_samples_per_second': 1263.687, 'test_steps_per_second': 20.455}
110 XLNet-B 200 2e-05 1000
Model seed is: 200, total, training informative and uninf samples: 2778 ,             1125, 1097


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6138
200,0.4975
300,0.4356
400,0.3316
500,0.2105
600,0.1842
700,0.1163
800,0.0746
900,0.0729
1000,0.0499


for ratio of 110, model of XLNet-B, seed of 200 performance is:
 0.5584524810765349  and  {'test_loss': 1.2364510297775269, 'test_accuracy': 0.8039568345323741, 'test_f1': 0.8149405772495755, 'test_precision': 0.7384615384615385, 'test_recall': 0.9090909090909091, 'test_AUC': 0.8787619344126194, 'test_runtime': 0.4422, 'test_samples_per_second': 1257.304, 'test_steps_per_second': 20.352}
110 XLNet-B 999 2e-05 1000
Model seed is: 999, total, training informative and uninf samples: 2778 ,             1105, 1117


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6395
200,0.4987
300,0.42
400,0.3215
500,0.2231
600,0.1571
700,0.136
800,0.0878
900,0.0448
1000,0.0713


for ratio of 110, model of XLNet-B, seed of 999 performance is:
 0.5811606391925989  and  {'test_loss': 1.1198420524597168, 'test_accuracy': 0.8039568345323741, 'test_f1': 0.8210180623973728, 'test_precision': 0.7692307692307693, 'test_recall': 0.8802816901408451, 'test_AUC': 0.8786246893123446, 'test_runtime': 0.4296, 'test_samples_per_second': 1294.306, 'test_steps_per_second': 20.951}
110 XLNet-L 9741 1e-05 750
Model seed is: 9741, total, training informative and uninf samples: 2778 ,             1107, 1115


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
100,0.6901
200,0.5614
300,0.4609
400,0.3619
500,0.2576
600,0.1846
700,0.1361


for ratio of 110, model of XLNet-L, seed of 9741 performance is:
 0.5794785534062237  and  {'test_loss': 0.8476578593254089, 'test_accuracy': 0.7589928057553957, 'test_f1': 0.7774086378737541, 'test_precision': 0.73125, 'test_recall': 0.8297872340425532, 'test_AUC': 0.853742817207641, 'test_runtime': 1.1769, 'test_samples_per_second': 472.416, 'test_steps_per_second': 7.647}
110 XLNet-L 1694 1e-05 750
Model seed is: 1694, total, training informative and uninf samples: 2778 ,             1100, 1122


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-large-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss


In [None]:

############### google drive  ########3
path_df = f'/content/drive/MyDrive/paper1/df_result'
# Save the DataFrame as a pickle file
df_result.to_pickle(path_df)

path_df = f'/content/drive/MyDrive/paper1/df_result_exc'
# Save the DataFrame as a pickle file
df_result_exc.to_pickle(path_df)

path_df = f'/content/drive/MyDrive/paper1/df_result_gb'
# Save the DataFrame as a pickle file
df_result_gb.to_pickle(path_df)



# Specify the path in Google Drive
file_path = f'/content/drive/MyDrive/paper1/all_datasets_seed_fold_results'
# Serialize and save the dictionary
with open(file_path, 'wb') as handle:
    pickle.dump(all_datasets_ratio_models_seed_results, handle, protocol=pickle.HIGHEST_PROTOCOL)