<a href="https://colab.research.google.com/github/marco-siino/text_preprocessing_impact/blob/main/IMDB_DS/RoBERTa_IMDB_TextPreProImpact_NB_PART_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Text preprocessing worth the time: A comparative survey on the impact of common techniques on NLP model performances. 
- - - 
RoBERTa ON IMDB DS EXPERIMENTS NOTEBOOK 
- - -
RoBERTa on Internet Movies Database Dataset.
Code by M. Siino. 

From the paper: "Text preprocessing worth the time: A comparative survey on the impact of common techniques on NLP model performances." by M.Siino et al.



## Importing modules.

In [None]:
!pip install simpletransformers
!pip install tensorboardx

import matplotlib.pyplot as plt
import os
import re
import shutil
import string
import tensorflow as tf
import numpy as np
import torch
import nltk
import pandas as pd

from tensorflow.keras import layers
from tensorflow.keras import losses
from tensorflow.keras import preprocessing
from keras.models import Model
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize 
from nltk.stem import PorterStemmer
from textblob import TextBlob
nltk.download('stopwords')
nltk.download('punkt')
from io import open
from pathlib import Path
from simpletransformers.classification import ClassificationModel, ClassificationArgs




  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Domenico\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Domenico\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## Importing DS and extract in current working directory.

In [None]:
url = "https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz"

dataset = tf.keras.utils.get_file("aclImdb_v1", url,
                                    untar=True, cache_dir='.',
                                    cache_subdir='')

dataset_dir = os.path.join(os.path.dirname(dataset), 'aclImdb')

train_set_dir = os.path.join(dataset_dir, 'train')
test_set_dir = os.path.join(dataset_dir, 'test')

remove_dir = os.path.join(train_set_dir, 'unsup')
shutil.rmtree(remove_dir)

## Building the dataset.

In [None]:
# Generate full randomized training set.
batch_size = 1
seed = 1

train_ds = tf.keras.preprocessing.text_dataset_from_directory(
    'aclImdb/train', 
    batch_size=batch_size,
    shuffle=False,
    seed=seed
    )

test_ds = tf.keras.preprocessing.text_dataset_from_directory(
    'aclImdb/test', 
    batch_size=batch_size,
    shuffle=False,
    seed=seed
    )

train_ds = train_ds.shuffle(25000,seed=1,reshuffle_each_iteration = False)
test_ds = test_ds.shuffle(25000,seed=1,reshuffle_each_iteration = False)

train_ds = train_ds.take(5000)
test_ds = test_ds.take(5000)

train_ds_size=len(train_ds)
test_ds_size=len(test_ds)

Found 25000 files belonging to 2 classes.
Found 25000 files belonging to 2 classes.


## Functions to pre-process source text. (A detailed discussion on our paper)

In [None]:
# Do-Nothing preprocessing function.
def DON(input_data):
  tag_open_CDATA_removed = tf.strings.regex_replace(input_data, '<\!\[CDATA\[', ' ')
  tag_closed_CDATA_removed = tf.strings.regex_replace(tag_open_CDATA_removed,'\]{1,}>', ' ')
  tag_author_lang_en_removed = tf.strings.regex_replace(tag_closed_CDATA_removed,'<author lang="en">', ' ')
  tag_closed_author_removed = tf.strings.regex_replace(tag_author_lang_en_removed,'</author>', ' ')
  tag_open_documents_removed = tf.strings.regex_replace(tag_closed_author_removed,'<documents>\n(\t){0,2}', '')
  output_data = tf.strings.regex_replace(tag_open_documents_removed,'</documents>\n(\t){0,2}', ' ')
  return output_data

# Lowercasing preprocessing function.
def LOW(input_data):  
  return tf.strings.lower(DON(input_data))

# Removing Stop Words function.
def RSW(input_data):
  output_data = DON(input_data)

  #print("\n\nInput data è il seguente tensore:")
  #print(output_data)

  #print("Lo converto in stringa e diventa:")
  # Il seguente try per l'adattamento del ts. Nell'except caso della simulazione vera e propria.
  try:
    input_string=output_data[0]

  # # # # # # # Questo è il caso della chiamata a funzione per la simulazione vera e propria.  
  except:
    #print("\n\n****CASO DELLA SIMULAZIONE VERA E PROPRIA****\n\n")
    #print("\nQuesto è il contenuto di output data in caso di simulazione")
    #print(output_data)
    input_string=output_data
    
    try:
      input_string = input_string.numpy()
    
    except:
      #print("This one is not a tensor!")
      return output_data

    else:
      #print("\nEstraendo il contenuto del tensore risulta:")
      input_string=(str(input_string))[2:-1]

    #print(input_string)
    blob = TextBlob(str(input_string)).words

    outputlist = [word for word in blob if word not in stopwords.words('english')]
    #print("tolte le stopword inglesi diventa:")

    output_string = (' '.join(word for word in outputlist))
    #print(output_string)  

    output_tensor=tf.constant(output_string)
    #print(output_tensor)

    return output_tensor

   # # # # # # # Questo è il caso dell'adattamento del TS.   
  else:
    
    try:

      # input_string = input_string.numpy() [0]
      input_string = input_string.numpy()
    
    except:
      #print("This one is not a tensor!")
      return output_data

    else:
      input_string=(str(input_string))[2:-1]

    #print(input_string)
    blob = TextBlob(str(input_string)).words

    outputlist = [word for word in blob if word not in stopwords.words('english')]
    #print("Tolte le stopword inglesi diventa:")

    output_string = (' '.join(word for word in outputlist))
    #print(output_string)  

    output_tensor=tf.constant([[output_string]])
    #print(output_tensor)

    return output_tensor

  return output_data

# Porter Stemmer preprocessing function.
def STM(input_data):
  output_data = DON(input_data)
  stemmer = PorterStemmer()

  #print("\n\nInput data è il seguente tensore:")
  #print(output_data)

  #print("Lo converto in stringa e diventa:")
  # Il seguente try per l'adattamento del ts. Nell'except caso della simulazione vera e propria.
  try:
    input_string=output_data[0]

  # # # # # # # Questo è il caso della chiamata a funzione per la simulazione vera e propria.  
  except:
    #print("\n\n****CASO DELLA SIMULAZIONE VERA E PROPRIA****\n\n")
    #print("\nQuesto è il contenuto di output data in caso di simulazione")
    #print(output_data)
    input_string=output_data
    
    try:
      input_string = input_string.numpy()
    
    except:
      #print("This one is not a tensor!")
      return output_data

    else:
      #print("\nEstraendo il contenuto del tensore risulta:")
      #print(input_string)
      input_string=(str(input_string))[2:-1]

    #print(input_string)
    blob = TextBlob(str(input_string)).words

    outputlist = [stemmer.stem(word) for word in blob]

    output_string = (' '.join(word for word in outputlist))
    #print(output_string)  

    output_tensor=tf.constant(output_string)
    #print(output_tensor)

    return output_tensor

   # # # # # # # Questo è il caso dell'adattamento del TS.   
  else:
    
    try:
      #input_string = input_string.numpy()[0]
      input_string = input_string.numpy()
      #print(input_string)
    
    except:
      #print("This one is not a tensor!")
      return output_data

    else:
      input_string=(str(input_string))[2:-1]

    #print(input_string)
    blob = TextBlob(str(input_string)).words

    outputlist = [stemmer.stem(word) for word in blob]

    output_string = (' '.join(word for word in outputlist))

    output_tensor=tf.constant([[output_string]])
    #print(output_tensor)

    return output_tensor

  return output_data

## Define the combined preprocessing functions. (The base functions are: DON, LOW, RSW and STM).

In [None]:
## SECTION WITH PAIRS OF PREPRO FUNCTIONS. APPLICATION ORDER MATTERS (...IN FOLLOWING SECTIONS TOO).
#...5
def LOW_RSW(input_data):
  return RSW(LOW(input_data))

# 6
def LOW_STM(input_data):
  return STM(LOW(input_data))

# 7
def RSW_LOW(input_data):
  return LOW(RSW(input_data))

# 8
def RSW_STM(input_data):
  return STM(RSW(input_data))

# 9
def STM_LOW(input_data):
  return LOW(STM(input_data))

# 10
def STM_RSW(input_data):
  return RSW(STM(input_data))
  
# 11
def LOW_STM_RSW(input_data):
  return RSW(STM(LOW(input_data)))

# 12
def LOW_RSW_STM(input_data):
  return STM(RSW(LOW(input_data)))

# 13
def STM_LOW_RSW(input_data):
  return RSW(LOW(STM(input_data)))

# 14
def STM_RSW_LOW(input_data):
  return LOW(RSW(STM(input_data)))

# 15
def RSW_LOW_STM(input_data):
  return STM(LOW(RSW(input_data)))

# 16
def RSW_STM_LOW(input_data):
  return LOW(STM(RSW(input_data)))

## Define a dictionary with -> function_names:prepro_function_caller. And a dictionary to store model results.

In [None]:
model_results = {}
prepro_functions_dict_base = {
    'DON':DON,
    'LOW':LOW,
    'RSW':RSW,
    'STM':STM
    }

# 3 prepro functions = 15 combs...+1 for do_nothing

prepro_functions_dict_comb = {
    # 1. Do nothing 
    'DON': DON,
    # 2. Lowercasing 
    'LOW':LOW,
    # 3. Removing Stopwords
    'RSW':RSW, 
    # 4. Porter Stemming
    'STM':STM,
    # 5. LOW->RSW
    'LOW_RSW':LOW_RSW, 
    # 6. LOW->STM
    'LOW_STM':LOW_STM,
    # 7. RSW->LOW
    'RSW_LOW':RSW_LOW,
    # 8. RSW->STM
    'RSW_STM':RSW_STM,
    # 9. STM->LOW
    'STM_LOW':STM_LOW,
    # 10. STM->RSW
    'STM_RSW':STM_RSW,
    # 11. LOW->STM->RSW
    'LOW_STM_RSW':LOW_STM_RSW,  
    # 12. LOW->RSW->STM
    'LOW_RSW_STM':LOW_RSW_STM,
    # 13. STM->LOW->RSW
    'STM_LOW_RSW':STM_LOW_RSW,
    # 14. STM->RSW->LOW
    'STM_RSW_LOW':STM_RSW_LOW,
    # 15. RSW->LOW->STM
    'RSW_LOW_STM':RSW_LOW_STM,
    # 16. RSW->STM->LOW
    'RSW_STM_LOW':RSW_STM_LOW
}

for key in prepro_functions_dict_comb:
  print(key)
  model_results[key]=[]

DON
LOW
RSW
STM
LOW_RSW
LOW_STM
RSW_LOW
RSW_STM
STM_LOW
STM_RSW
LOW_STM_RSW
LOW_RSW_STM
STM_LOW_RSW
STM_RSW_LOW
RSW_LOW_STM
RSW_STM_LOW


## Function to convert DSs to Pandas Dataframe

In [None]:
def preprocess_and_convert_ds(preprocessing_function):
  # Convert English dataset.
  train_df = [] # will contain text and label
  for element in train_ds:
    authorDocument=element[0]
    label=int(element[1].numpy())
    #print(authorDocument[0])
    text = preprocessing_function(authorDocument[0].numpy()).numpy().decode('UTF-8')
    train_df.append({
        'text':text,
        'label':label
    })
  train_df = pd.DataFrame(train_df)

  test_df = [] # will contain text and label
  for element in test_ds:
    authorDocument=element[0]
    label=int(element[1].numpy())
    #print(authorDocument[0])
    text = preprocessing_function(authorDocument[0].numpy()).numpy().decode('UTF-8')
    test_df.append({
        'text':text,
        'label':label
    })
  test_df = pd.DataFrame(test_df)

  return train_df, test_df


## Print some RAW and preprocessed samples (No need to execute)

In [None]:
for idx, element in enumerate(raw_train_ds_es):
  if idx>1: break
  authorDocument=element[0]
  label=element[1]
  temp = custom_standardization(authorDocument[0].numpy()).numpy().decode('UTF-8')
  print("Not-Preprocessed samples: \n",authorDocument)
  print("Preprocessed samples: \n",temp)

NameError: ignored

## Some parameters definition...

In [None]:
# check gpu
cuda_available = torch.cuda.is_available()

print('Cuda available? ',cuda_available)

num_epochs_per_run = 10
num_runs = 5

Cuda available?  False


## Training and evaluation of the model

In [None]:
for key in prepro_functions_dict_comb:
  model_results[key]=[]

for key in prepro_functions_dict_comb:

  model_args = ClassificationArgs(num_train_epochs=1, 
                                      no_save=True, 
                                      no_cache=True, 
                                      silent=True,
                                      overwrite_output_dir=True)

  model = ClassificationModel("roberta", 
                                  'roberta-base', 
                                  args = model_args, 
                                  num_labels=2, 
                                  use_cuda=cuda_available)

  runs_accuracy = []

  print("\n\n* * * * EVALUATION USING", key, "AS PREPROCESSING FUNCTION * * * *")

  # Preprocess train and test set and convert to DFs.
  train_df,test_df = preprocess_and_convert_ds(prepro_functions_dict_comb[key])
  
  for run in range(1,(num_runs+1)):
    print("\nRUN NUMBER: ", run)
    epochs_accuracy=[]
    model = ClassificationModel("roberta", 
                                  'roberta-base', 
                                  args = model_args, 
                                  num_labels=2, 
                                  use_cuda=cuda_available)
    for epoch in range (0,num_epochs_per_run):
      print("\nEPOCH NUMBER: ", epoch)
      # train model
      print("\nNOW TRAIN THE MODEL.")
      model.train_model(train_df,show_running_loss=False)
      print("\nNOW EVALUATE THE TEST DF.")
      result, model_outputs, wrong_predictions = model.eval_model(test_df)
      # Results on test set.
      print(result)
      correct_predictions = result['tp']+result['tn']
      print("Correct predictions are: ",correct_predictions)
      total_predictions = result['tp']+result['tn']+result['fp']+result['fn']
      print("Total predictions are: ",total_predictions)
      accuracy = correct_predictions/total_predictions
      print("Accuracy on test set is:",accuracy,"\n\n")
      epochs_accuracy.append(accuracy)

    print(epochs_accuracy)
    runs_accuracy.append(max(epochs_accuracy))

  runs_accuracy.sort()
  print("\n\n Over all runs maximum accuracies are:", runs_accuracy)
  print("The median is:",runs_accuracy[2])

  if (runs_accuracy[2]-runs_accuracy[0])>(runs_accuracy[4]-runs_accuracy[2]):
    max_range_from_median = runs_accuracy[2]-runs_accuracy[0]
  else:
    max_range_from_median = runs_accuracy[4]-runs_accuracy[2]
  final_result = str(runs_accuracy[2])+" +/- "+ str(max_range_from_median)
  model_results[key].append(final_result)
  print("RoBERTa Accuracy Score on Test set -> ",model_results[key])


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.



* * * * EVALUATION USING DON AS PREPROCESSING FUNCTION * * * *

RUN NUMBER:  1


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.754938108622098, 'tp': 2263, 'tn': 2122, 'fp': 371, 'fn': 244, 'auroc': 0.9529713112950806, 'auprc': 0.9505973707213176, 'eval_loss': 0.3132915409252048}
Correct predictions are:  4385
Total predictions are:  5000
Accuracy on test set is: 0.877 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7682038972214543, 'tp': 2247, 'tn': 2173, 'fp': 320, 'fn': 260, 'auroc': 0.9551056480282805, 'auprc': 0.953263324095195, 'eval_loss': 0.38187613041996954}
Correct predictions are:  4420
Total predictions are:  5000
Accuracy on test set is: 0.884 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7577270405533774, 'tp': 2266, 'tn': 2126, 'fp': 367, 'fn': 241, 'auroc': 0.9535175555776357, 'auprc': 0.9528859963228092, 'eval_loss': 0.4419690307378769}
Correct predictions are:  4392
Total predictions are:  5000
Accuracy on test set is: 0.8784 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7687515446699587, 'tp': 2286, 'tn': 2133, 'fp': 360, 'fn': 221, 'auroc': 0.9547054048903744, 'auprc': 0.9520383092005978, 'eval_loss': 0.530794188869372}
Correct predictions are:  4419
Total predictions are:  5000
Accuracy on test set is: 0.8838 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7620025783413479, 'tp': 2215, 'tn': 2190, 'fp': 303, 'fn': 292, 'auroc': 0.9536068362775963, 'auprc': 0.9524511204160455, 'eval_loss': 0.5554404302123934}
Correct predictions are:  4405
Total predictions are:  5000
Accuracy on test set is: 0.881 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7554917762003196, 'tp': 2237, 'tn': 2151, 'fp': 342, 'fn': 270, 'auroc': 0.9500728885714465, 'auprc': 0.945976993459097, 'eval_loss': 0.6426424967063591}
Correct predictions are:  4388
Total predictions are:  5000
Accuracy on test set is: 0.8776 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7503201626658003, 'tp': 2250, 'tn': 2124, 'fp': 369, 'fn': 257, 'auroc': 0.947987992225859, 'auprc': 0.9424164892867258, 'eval_loss': 0.7041104320526123}
Correct predictions are:  4374
Total predictions are:  5000
Accuracy on test set is: 0.8748 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7465185184809474, 'tp': 2144, 'tn': 2221, 'fp': 272, 'fn': 363, 'auroc': 0.9479680720696849, 'auprc': 0.9438770776345335, 'eval_loss': 0.72091151313968}
Correct predictions are:  4365
Total predictions are:  5000
Accuracy on test set is: 0.873 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7510091602280606, 'tp': 2167, 'tn': 2210, 'fp': 283, 'fn': 340, 'auroc': 0.9481839937625111, 'auprc': 0.9455342441280421, 'eval_loss': 0.8010648155923933}
Correct predictions are:  4377
Total predictions are:  5000
Accuracy on test set is: 0.8754 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7644279405828643, 'tp': 2255, 'tn': 2155, 'fp': 338, 'fn': 252, 'auroc': 0.9495454444362844, 'auprc': 0.943595746299547, 'eval_loss': 0.7286065812002868}
Correct predictions are:  4410
Total predictions are:  5000
Accuracy on test set is: 0.882 


[0.877, 0.884, 0.8784, 0.8838, 0.881, 0.8776, 0.8748, 0.873, 0.8754, 0.882]

RUN NUMBER:  2


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7597425068180992, 'tp': 2232, 'tn': 2167, 'fp': 326, 'fn': 275, 'auroc': 0.9536881169148365, 'auprc': 0.9522786422717738, 'eval_loss': 0.3237849683135748}
Correct predictions are:  4399
Total predictions are:  5000
Accuracy on test set is: 0.8798 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7672769161743739, 'tp': 2235, 'tn': 2183, 'fp': 310, 'fn': 272, 'auroc': 0.9541198002992343, 'auprc': 0.9543134808931724, 'eval_loss': 0.3949850876659155}
Correct predictions are:  4418
Total predictions are:  5000
Accuracy on test set is: 0.8836 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7621453480820294, 'tp': 2186, 'tn': 2219, 'fp': 274, 'fn': 321, 'auroc': 0.9522911459625845, 'auprc': 0.9513414533807589, 'eval_loss': 0.4962596222508699}
Correct predictions are:  4405
Total predictions are:  5000
Accuracy on test set is: 0.881 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7688523343218947, 'tp': 2299, 'tn': 2119, 'fp': 374, 'fn': 208, 'auroc': 0.9547293250779086, 'auprc': 0.9546490778482303, 'eval_loss': 0.503380943993479}
Correct predictions are:  4418
Total predictions are:  5000
Accuracy on test set is: 0.8836 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7647978739003947, 'tp': 2214, 'tn': 2198, 'fp': 295, 'fn': 293, 'auroc': 0.9543485220924133, 'auprc': 0.9522588158095158, 'eval_loss': 0.5865491500595584}
Correct predictions are:  4412
Total predictions are:  5000
Accuracy on test set is: 0.8824 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7690667757855546, 'tp': 2186, 'tn': 2236, 'fp': 257, 'fn': 321, 'auroc': 0.9539785991922176, 'auprc': 0.9520029202074825, 'eval_loss': 0.6433624504799024}
Correct predictions are:  4422
Total predictions are:  5000
Accuracy on test set is: 0.8844 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7643979501580198, 'tp': 2213, 'tn': 2198, 'fp': 295, 'fn': 294, 'auroc': 0.9377184717128183, 'auprc': 0.9439043253814954, 'eval_loss': 0.7456477667503059}
Correct predictions are:  4411
Total predictions are:  5000
Accuracy on test set is: 0.8822 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7605700671943327, 'tp': 2182, 'tn': 2219, 'fp': 274, 'fn': 325, 'auroc': 0.9487554382426358, 'auprc': 0.9413627930969906, 'eval_loss': 0.6696175861876458}
Correct predictions are:  4401
Total predictions are:  5000
Accuracy on test set is: 0.8802 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7533500986026186, 'tp': 2247, 'tn': 2135, 'fp': 358, 'fn': 260, 'auroc': 0.9490260003638429, 'auprc': 0.9469624535491705, 'eval_loss': 0.7044237973585725}
Correct predictions are:  4382
Total predictions are:  5000
Accuracy on test set is: 0.8764 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.757204799094917, 'tp': 2210, 'tn': 2183, 'fp': 310, 'fn': 297, 'auroc': 0.9490968009189191, 'auprc': 0.9427910642837064, 'eval_loss': 0.7450629519903101}
Correct predictions are:  4393
Total predictions are:  5000
Accuracy on test set is: 0.8786 


[0.8798, 0.8836, 0.881, 0.8836, 0.8824, 0.8844, 0.8822, 0.8802, 0.8764, 0.8786]

RUN NUMBER:  3


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7591780029924174, 'tp': 2303, 'tn': 2089, 'fp': 404, 'fn': 204, 'auroc': 0.9521460248248347, 'auprc': 0.9486782723359195, 'eval_loss': 0.3235691763475537}
Correct predictions are:  4392
Total predictions are:  5000
Accuracy on test set is: 0.8784 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7645215784434829, 'tp': 2274, 'tn': 2135, 'fp': 358, 'fn': 233, 'auroc': 0.9539001185769296, 'auprc': 0.950901911307503, 'eval_loss': 0.37107261291146276}
Correct predictions are:  4409
Total predictions are:  5000
Accuracy on test set is: 0.8818 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7688097903315592, 'tp': 2226, 'tn': 2196, 'fp': 297, 'fn': 281, 'auroc': 0.953703157032751, 'auprc': 0.9505905440867591, 'eval_loss': 0.446359068647027}
Correct predictions are:  4422
Total predictions are:  5000
Accuracy on test set is: 0.8844 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7668652239427695, 'tp': 2200, 'tn': 2217, 'fp': 276, 'fn': 307, 'auroc': 0.9557759732836306, 'auprc': 0.9543837677347025, 'eval_loss': 0.507333926102519}
Correct predictions are:  4417
Total predictions are:  5000
Accuracy on test set is: 0.8834 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7631987502213726, 'tp': 2214, 'tn': 2194, 'fp': 299, 'fn': 293, 'auroc': 0.9479520719442439, 'auprc': 0.9264832309450399, 'eval_loss': 0.5979872757006437}
Correct predictions are:  4408
Total predictions are:  5000
Accuracy on test set is: 0.8816 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7726127253086389, 'tp': 2253, 'tn': 2178, 'fp': 315, 'fn': 254, 'auroc': 0.9549266866252231, 'auprc': 0.9526866411920065, 'eval_loss': 0.5972542064115405}
Correct predictions are:  4431
Total predictions are:  5000
Accuracy on test set is: 0.8862 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7626310842176793, 'tp': 2242, 'tn': 2164, 'fp': 329, 'fn': 265, 'auroc': 0.9529073907939436, 'auprc': 0.9519404054406664, 'eval_loss': 0.6266137860540301}
Correct predictions are:  4406
Total predictions are:  5000
Accuracy on test set is: 0.8812 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7632002105606741, 'tp': 2171, 'tn': 2236, 'fp': 257, 'fn': 336, 'auroc': 0.9509582555127231, 'auprc': 0.9498857846421064, 'eval_loss': 0.6323644897442311}
Correct predictions are:  4407
Total predictions are:  5000
Accuracy on test set is: 0.8814 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7581126135359263, 'tp': 2266, 'tn': 2127, 'fp': 366, 'fn': 241, 'auroc': 0.898493684190484, 'auprc': 0.9117107536203523, 'eval_loss': 0.7279770409049466}
Correct predictions are:  4393
Total predictions are:  5000
Accuracy on test set is: 0.8786 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7516045979334833, 'tp': 2203, 'tn': 2176, 'fp': 317, 'fn': 304, 'auroc': 0.9481359933861881, 'auprc': 0.9462182331407994, 'eval_loss': 0.6991746229579672}
Correct predictions are:  4379
Total predictions are:  5000
Accuracy on test set is: 0.8758 


[0.8784, 0.8818, 0.8844, 0.8834, 0.8816, 0.8862, 0.8812, 0.8814, 0.8786, 0.8758]

RUN NUMBER:  4


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7648016403773279, 'tp': 2218, 'tn': 2194, 'fp': 299, 'fn': 289, 'auroc': 0.9530345117905723, 'auprc': 0.9496935432283458, 'eval_loss': 0.3092350295126438}
Correct predictions are:  4412
Total predictions are:  5000
Accuracy on test set is: 0.8824 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7511210736523918, 'tp': 2251, 'tn': 2125, 'fp': 368, 'fn': 256, 'auroc': 0.9519101829758345, 'auprc': 0.9504678628569521, 'eval_loss': 0.4324251752607524}
Correct predictions are:  4376
Total predictions are:  5000
Accuracy on test set is: 0.8752 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7632762523342344, 'tp': 2230, 'tn': 2178, 'fp': 315, 'fn': 277, 'auroc': 0.9515397800718756, 'auprc': 0.9493874927327175, 'eval_loss': 0.477472404935956}
Correct predictions are:  4408
Total predictions are:  5000
Accuracy on test set is: 0.8816 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7497578393242537, 'tp': 2221, 'tn': 2153, 'fp': 340, 'fn': 286, 'auroc': 0.9501096088593335, 'auprc': 0.9484888533113941, 'eval_loss': 0.543762079654634}
Correct predictions are:  4374
Total predictions are:  5000
Accuracy on test set is: 0.8748 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.760022656320734, 'tp': 2198, 'tn': 2202, 'fp': 291, 'fn': 309, 'auroc': 0.9519711434537647, 'auprc': 0.9525057393377707, 'eval_loss': 0.5998582600500435}
Correct predictions are:  4400
Total predictions are:  5000
Accuracy on test set is: 0.88 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7477451308869199, 'tp': 2240, 'tn': 2128, 'fp': 365, 'fn': 267, 'auroc': 0.9505893726206813, 'auprc': 0.9513174086653766, 'eval_loss': 0.6176820873912424}
Correct predictions are:  4368
Total predictions are:  5000
Accuracy on test set is: 0.8736 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7572860920832915, 'tp': 2249, 'tn': 2143, 'fp': 350, 'fn': 258, 'auroc': 0.9499126473151549, 'auprc': 0.94920555753996, 'eval_loss': 0.6196344303788617}
Correct predictions are:  4392
Total predictions are:  5000
Accuracy on test set is: 0.8784 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7555978706052944, 'tp': 2202, 'tn': 2187, 'fp': 306, 'fn': 305, 'auroc': 0.9479071915923821, 'auprc': 0.9444745902464448, 'eval_loss': 0.6572746099311858}
Correct predictions are:  4389
Total predictions are:  5000
Accuracy on test set is: 0.8778 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7575550106955297, 'tp': 2243, 'tn': 2150, 'fp': 343, 'fn': 264, 'auroc': 0.9486463173871283, 'auprc': 0.9445491388539877, 'eval_loss': 0.680593849513121}
Correct predictions are:  4393
Total predictions are:  5000
Accuracy on test set is: 0.8786 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7348717434856539, 'tp': 2159, 'tn': 2178, 'fp': 315, 'fn': 348, 'auroc': 0.9364719819403384, 'auprc': 0.9109427564387452, 'eval_loss': 0.7813654023980722}
Correct predictions are:  4337
Total predictions are:  5000
Accuracy on test set is: 0.8674 


[0.8824, 0.8752, 0.8816, 0.8748, 0.88, 0.8736, 0.8784, 0.8778, 0.8786, 0.8674]

RUN NUMBER:  5


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7544510842879878, 'tp': 2216, 'tn': 2170, 'fp': 323, 'fn': 291, 'auroc': 0.9528751505411802, 'auprc': 0.9512042400204942, 'eval_loss': 0.33566144507080314}
Correct predictions are:  4386
Total predictions are:  5000
Accuracy on test set is: 0.8772 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7713517788240334, 'tp': 2247, 'tn': 2181, 'fp': 312, 'fn': 260, 'auroc': 0.9557815733275349, 'auprc': 0.953585405792302, 'eval_loss': 0.3510309385582805}
Correct predictions are:  4428
Total predictions are:  5000
Accuracy on test set is: 0.8856 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7658504561130164, 'tp': 2316, 'tn': 2092, 'fp': 401, 'fn': 191, 'auroc': 0.9545238034666192, 'auprc': 0.951917471941198, 'eval_loss': 0.4263353682488203}
Correct predictions are:  4408
Total predictions are:  5000
Accuracy on test set is: 0.8816 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7713396955472032, 'tp': 2246, 'tn': 2182, 'fp': 311, 'fn': 261, 'auroc': 0.9514564194183283, 'auprc': 0.9485819792426995, 'eval_loss': 0.5136554895877838}
Correct predictions are:  4428
Total predictions are:  5000
Accuracy on test set is: 0.8856 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7662853599060031, 'tp': 2268, 'tn': 2146, 'fp': 347, 'fn': 239, 'auroc': 0.951677701153177, 'auprc': 0.9482988628622041, 'eval_loss': 0.5784692725084722}
Correct predictions are:  4414
Total predictions are:  5000
Accuracy on test set is: 0.8828 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7561474845580967, 'tp': 2228, 'tn': 2162, 'fp': 331, 'fn': 279, 'auroc': 0.951494579717505, 'auprc': 0.9481788959732541, 'eval_loss': 0.6615008772522211}
Correct predictions are:  4390
Total predictions are:  5000
Accuracy on test set is: 0.878 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7604006338797329, 'tp': 2212, 'tn': 2189, 'fp': 304, 'fn': 295, 'auroc': 0.9494229634760337, 'auprc': 0.9475935621989278, 'eval_loss': 0.6181033768525347}
Correct predictions are:  4401
Total predictions are:  5000
Accuracy on test set is: 0.8802 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7492241624087278, 'tp': 2152, 'tn': 2220, 'fp': 273, 'fn': 355, 'auroc': 0.9459513362584763, 'auprc': 0.944526487539929, 'eval_loss': 0.703925025989674}
Correct predictions are:  4372
Total predictions are:  5000
Accuracy on test set is: 0.8744 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7420619766766573, 'tp': 2141, 'tn': 2213, 'fp': 280, 'fn': 366, 'auroc': 0.916560065830916, 'auprc': 0.8548768923457768, 'eval_loss': 0.7014652471166104}
Correct predictions are:  4354
Total predictions are:  5000
Accuracy on test set is: 0.8708 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.740740494733726, 'tp': 2308, 'tn': 2034, 'fp': 459, 'fn': 199, 'auroc': 0.9346524476751898, 'auprc': 0.8921191987585892, 'eval_loss': 0.7218879540137947}
Correct predictions are:  4342
Total predictions are:  5000
Accuracy on test set is: 0.8684 


[0.8772, 0.8856, 0.8816, 0.8856, 0.8828, 0.878, 0.8802, 0.8744, 0.8708, 0.8684]


 Over all runs maximum accuracies are: [0.8824, 0.884, 0.8844, 0.8856, 0.8862]
The median is: 0.8844
RoBERTa Accuracy Score on Test set ->  ['0.8844 +/- 0.0020000000000000018']


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.



* * * * EVALUATION USING LOW AS PREPROCESSING FUNCTION * * * *

RUN NUMBER:  1


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.755544591082547, 'tp': 2240, 'tn': 2148, 'fp': 345, 'fn': 267, 'auroc': 0.9504087312044526, 'auprc': 0.947690070251191, 'eval_loss': 0.33618136206418275}
Correct predictions are:  4388
Total predictions are:  5000
Accuracy on test set is: 0.8776 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7416735394218827, 'tp': 2257, 'tn': 2094, 'fp': 399, 'fn': 250, 'auroc': 0.949823606617076, 'auprc': 0.9482792998814706, 'eval_loss': 0.40799795610010625}
Correct predictions are:  4351
Total predictions are:  5000
Accuracy on test set is: 0.8702 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7605548518702583, 'tp': 2234, 'tn': 2167, 'fp': 326, 'fn': 273, 'auroc': 0.9503757709460443, 'auprc': 0.946978581779973, 'eval_loss': 0.4232261982858181}
Correct predictions are:  4401
Total predictions are:  5000
Accuracy on test set is: 0.8802 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7539982017540109, 'tp': 2202, 'tn': 2183, 'fp': 310, 'fn': 305, 'auroc': 0.9511224968003749, 'auprc': 0.950220770849169, 'eval_loss': 0.5081599104002118}
Correct predictions are:  4385
Total predictions are:  5000
Accuracy on test set is: 0.877 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.749781939658, 'tp': 2244, 'tn': 2129, 'fp': 364, 'fn': 263, 'auroc': 0.9503130504543155, 'auprc': 0.9488933576716698, 'eval_loss': 0.5650728192046285}
Correct predictions are:  4373
Total predictions are:  5000
Accuracy on test set is: 0.8746 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7474875264273246, 'tp': 2279, 'tn': 2085, 'fp': 408, 'fn': 228, 'auroc': 0.9489634398733686, 'auprc': 0.9462481725378848, 'eval_loss': 0.6276882143758238}
Correct predictions are:  4364
Total predictions are:  5000
Accuracy on test set is: 0.8728 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.747998485128049, 'tp': 2195, 'tn': 2175, 'fp': 318, 'fn': 312, 'auroc': 0.9489380796745447, 'auprc': 0.9453636016704875, 'eval_loss': 0.656616228915751}
Correct predictions are:  4370
Total predictions are:  5000
Accuracy on test set is: 0.874 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.736411727600146, 'tp': 2171, 'tn': 2170, 'fp': 323, 'fn': 336, 'auroc': 0.9455998135025379, 'auprc': 0.9434266395931046, 'eval_loss': 0.6957638150811195}
Correct predictions are:  4341
Total predictions are:  5000
Accuracy on test set is: 0.8682 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7433869365169252, 'tp': 2159, 'tn': 2199, 'fp': 294, 'fn': 348, 'auroc': 0.9403532123691849, 'auprc': 0.9415696564113749, 'eval_loss': 0.6894046801302582}
Correct predictions are:  4358
Total predictions are:  5000
Accuracy on test set is: 0.8716 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7425652814481098, 'tp': 2249, 'tn': 2105, 'fp': 388, 'fn': 258, 'auroc': 0.9452596508356624, 'auprc': 0.9402412548879399, 'eval_loss': 0.702355285038054}
Correct predictions are:  4354
Total predictions are:  5000
Accuracy on test set is: 0.8708 


[0.8776, 0.8702, 0.8802, 0.877, 0.8746, 0.8728, 0.874, 0.8682, 0.8716, 0.8708]

RUN NUMBER:  2


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7524046266708311, 'tp': 2204, 'tn': 2177, 'fp': 316, 'fn': 303, 'auroc': 0.9494293635262099, 'auprc': 0.9481557846357894, 'eval_loss': 0.36128750175237656}
Correct predictions are:  4381
Total predictions are:  5000
Accuracy on test set is: 0.8762 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.753858266382477, 'tp': 2233, 'tn': 2151, 'fp': 342, 'fn': 274, 'auroc': 0.9500623284886553, 'auprc': 0.948287943863943, 'eval_loss': 0.4463342790275812}
Correct predictions are:  4384
Total predictions are:  5000
Accuracy on test set is: 0.8768 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7495320718584272, 'tp': 2232, 'tn': 2141, 'fp': 352, 'fn': 275, 'auroc': 0.9488623990812088, 'auprc': 0.9460927711900419, 'eval_loss': 0.45786396456807854}
Correct predictions are:  4373
Total predictions are:  5000
Accuracy on test set is: 0.8746 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7524823052361913, 'tp': 2243, 'tn': 2137, 'fp': 356, 'fn': 264, 'auroc': 0.9468464632762722, 'auprc': 0.9445029058795043, 'eval_loss': 0.5265922792971134}
Correct predictions are:  4380
Total predictions are:  5000
Accuracy on test set is: 0.876 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.749300286788036, 'tp': 2255, 'tn': 2116, 'fp': 377, 'fn': 252, 'auroc': 0.94747734822241, 'auprc': 0.9446969465496895, 'eval_loss': 0.5918124804571271}
Correct predictions are:  4371
Total predictions are:  5000
Accuracy on test set is: 0.8742 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7412333778046004, 'tp': 2197, 'tn': 2156, 'fp': 337, 'fn': 310, 'auroc': 0.9441313219895644, 'auprc': 0.9409015841753449, 'eval_loss': 0.6146714581944048}
Correct predictions are:  4353
Total predictions are:  5000
Accuracy on test set is: 0.8706 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7508022441206641, 'tp': 2201, 'tn': 2176, 'fp': 317, 'fn': 306, 'auroc': 0.9440080410230416, 'auprc': 0.9405967722650311, 'eval_loss': 0.6482548541948199}
Correct predictions are:  4377
Total predictions are:  5000
Accuracy on test set is: 0.8754 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7472457898946838, 'tp': 2178, 'tn': 2190, 'fp': 303, 'fn': 329, 'auroc': 0.9466846220074365, 'auprc': 0.9429516986066807, 'eval_loss': 0.6860254952343181}
Correct predictions are:  4368
Total predictions are:  5000
Accuracy on test set is: 0.8736 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.730806070309698, 'tp': 2112, 'tn': 2213, 'fp': 280, 'fn': 395, 'auroc': 0.9408098559492706, 'auprc': 0.9371649617607838, 'eval_loss': 0.7314385266229511}
Correct predictions are:  4325
Total predictions are:  5000
Accuracy on test set is: 0.865 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7373359051291936, 'tp': 2227, 'tn': 2115, 'fp': 378, 'fn': 280, 'auroc': 0.9426522703937997, 'auprc': 0.9358685653372962, 'eval_loss': 0.7768301411334425}
Correct predictions are:  4342
Total predictions are:  5000
Accuracy on test set is: 0.8684 


[0.8762, 0.8768, 0.8746, 0.876, 0.8742, 0.8706, 0.8754, 0.8736, 0.865, 0.8684]

RUN NUMBER:  3


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7525434012162666, 'tp': 2135, 'tn': 2244, 'fp': 249, 'fn': 372, 'auroc': 0.9520199438363596, 'auprc': 0.9501771502301808, 'eval_loss': 0.3469879975497723}
Correct predictions are:  4379
Total predictions are:  5000
Accuracy on test set is: 0.8758 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7506310050505647, 'tp': 2290, 'tn': 2081, 'fp': 412, 'fn': 217, 'auroc': 0.9532165932180908, 'auprc': 0.950048833753542, 'eval_loss': 0.4745464895848185}
Correct predictions are:  4371
Total predictions are:  5000
Accuracy on test set is: 0.8742 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7582523390288064, 'tp': 2238, 'tn': 2157, 'fp': 336, 'fn': 269, 'auroc': 0.9540969201198537, 'auprc': 0.9504432051690076, 'eval_loss': 0.5155596199855208}
Correct predictions are:  4395
Total predictions are:  5000
Accuracy on test set is: 0.879 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.764397911629469, 'tp': 2214, 'tn': 2197, 'fp': 296, 'fn': 293, 'auroc': 0.9546247642581518, 'auprc': 0.9528134985738902, 'eval_loss': 0.5359511402349919}
Correct predictions are:  4411
Total predictions are:  5000
Accuracy on test set is: 0.8822 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7479978527832064, 'tp': 2194, 'tn': 2176, 'fp': 317, 'fn': 313, 'auroc': 0.9494841639558453, 'auprc': 0.9465753031954607, 'eval_loss': 0.617453955027461}
Correct predictions are:  4370
Total predictions are:  5000
Accuracy on test set is: 0.874 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7422105295852961, 'tp': 2236, 'tn': 2118, 'fp': 375, 'fn': 271, 'auroc': 0.9487467181742706, 'auprc': 0.9450545727268589, 'eval_loss': 0.6386598668288439}
Correct predictions are:  4354
Total predictions are:  5000
Accuracy on test set is: 0.8708 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7532288194533806, 'tp': 2211, 'tn': 2172, 'fp': 321, 'fn': 296, 'auroc': 0.9504380114340096, 'auprc': 0.945496673741393, 'eval_loss': 0.7068623177208007}
Correct predictions are:  4383
Total predictions are:  5000
Accuracy on test set is: 0.8766 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.753858266382477, 'tp': 2233, 'tn': 2151, 'fp': 342, 'fn': 274, 'auroc': 0.9512552178409079, 'auprc': 0.9502406845407215, 'eval_loss': 0.7086620261851698}
Correct predictions are:  4384
Total predictions are:  5000
Accuracy on test set is: 0.8768 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7456100140126574, 'tp': 2183, 'tn': 2181, 'fp': 312, 'fn': 324, 'auroc': 0.9469975844610621, 'auprc': 0.9394310330653102, 'eval_loss': 0.7861946324986405}
Correct predictions are:  4364
Total predictions are:  5000
Accuracy on test set is: 0.8728 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7540201067951672, 'tp': 2258, 'tn': 2125, 'fp': 368, 'fn': 249, 'auroc': 0.9445387651839192, 'auprc': 0.9329575027080576, 'eval_loss': 0.7599136639865115}
Correct predictions are:  4383
Total predictions are:  5000
Accuracy on test set is: 0.8766 


[0.8758, 0.8742, 0.879, 0.8822, 0.874, 0.8708, 0.8766, 0.8768, 0.8728, 0.8766]

RUN NUMBER:  4


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7350033939270988, 'tp': 2227, 'tn': 2109, 'fp': 384, 'fn': 280, 'auroc': 0.9441856424154365, 'auprc': 0.9420287338135092, 'eval_loss': 0.3620738444760442}
Correct predictions are:  4336
Total predictions are:  5000
Accuracy on test set is: 0.8672 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7263125633248632, 'tp': 2129, 'tn': 2186, 'fp': 307, 'fn': 378, 'auroc': 0.9411924989491919, 'auprc': 0.9397193095812026, 'eval_loss': 0.44632428987324235}
Correct predictions are:  4315
Total predictions are:  5000
Accuracy on test set is: 0.863 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7365622457608624, 'tp': 2272, 'tn': 2064, 'fp': 429, 'fn': 235, 'auroc': 0.9436464381880754, 'auprc': 0.9410553076264091, 'eval_loss': 0.4220385948389769}
Correct predictions are:  4336
Total predictions are:  5000
Accuracy on test set is: 0.8672 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7249029576744712, 'tp': 2143, 'tn': 2169, 'fp': 324, 'fn': 364, 'auroc': 0.9414304208144991, 'auprc': 0.9399353407009803, 'eval_loss': 0.5183736719317734}
Correct predictions are:  4312
Total predictions are:  5000
Accuracy on test set is: 0.8624 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7441006448475392, 'tp': 2209, 'tn': 2151, 'fp': 342, 'fn': 298, 'auroc': 0.9447374867418961, 'auprc': 0.9427300383526334, 'eval_loss': 0.5432258655719459}
Correct predictions are:  4360
Total predictions are:  5000
Accuracy on test set is: 0.872 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7380896374758878, 'tp': 2161, 'tn': 2184, 'fp': 309, 'fn': 346, 'auroc': 0.9421761866613034, 'auprc': 0.9375129555393753, 'eval_loss': 0.5967135988123715}
Correct predictions are:  4345
Total predictions are:  5000
Accuracy on test set is: 0.869 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7280241062856395, 'tp': 2179, 'tn': 2141, 'fp': 352, 'fn': 328, 'auroc': 0.9445683654159848, 'auprc': 0.9405807539425518, 'eval_loss': 0.6216686956569553}
Correct predictions are:  4320
Total predictions are:  5000
Accuracy on test set is: 0.864 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7279987097537036, 'tp': 2166, 'tn': 2154, 'fp': 339, 'fn': 341, 'auroc': 0.939215123446568, 'auprc': 0.9370172475157174, 'eval_loss': 0.681729870352149}
Correct predictions are:  4320
Total predictions are:  5000
Accuracy on test set is: 0.864 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7428532029137097, 'tp': 2202, 'tn': 2155, 'fp': 338, 'fn': 305, 'auroc': 0.944628125884507, 'auprc': 0.9419174790112841, 'eval_loss': 0.6963198376288637}
Correct predictions are:  4357
Total predictions are:  5000
Accuracy on test set is: 0.8714 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7292095811271613, 'tp': 2177, 'tn': 2146, 'fp': 347, 'fn': 330, 'auroc': 0.941112178319478, 'auprc': 0.9359660932103488, 'eval_loss': 0.7057149167446419}
Correct predictions are:  4323
Total predictions are:  5000
Accuracy on test set is: 0.8646 


[0.8672, 0.863, 0.8672, 0.8624, 0.872, 0.869, 0.864, 0.864, 0.8714, 0.8646]

RUN NUMBER:  5


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7511152514384157, 'tp': 2233, 'tn': 2144, 'fp': 349, 'fn': 274, 'auroc': 0.949089680863098, 'auprc': 0.9462502815170927, 'eval_loss': 0.35026032426804304}
Correct predictions are:  4377
Total predictions are:  5000
Accuracy on test set is: 0.8754 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7442934689631975, 'tp': 2255, 'tn': 2103, 'fp': 390, 'fn': 252, 'auroc': 0.9493088825816394, 'auprc': 0.946843455721955, 'eval_loss': 0.3784707838445902}
Correct predictions are:  4358
Total predictions are:  5000
Accuracy on test set is: 0.8716 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.753110161135388, 'tp': 2253, 'tn': 2128, 'fp': 365, 'fn': 254, 'auroc': 0.9505983326909284, 'auprc': 0.944608941997401, 'eval_loss': 0.4899028937742114}
Correct predictions are:  4381
Total predictions are:  5000
Accuracy on test set is: 0.8762 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7516187291715369, 'tp': 2207, 'tn': 2172, 'fp': 321, 'fn': 300, 'auroc': 0.947825990955769, 'auprc': 0.9453107663186231, 'eval_loss': 0.5107393462054431}
Correct predictions are:  4379
Total predictions are:  5000
Accuracy on test set is: 0.8758 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7422941688602452, 'tp': 2273, 'tn': 2078, 'fp': 415, 'fn': 234, 'auroc': 0.9467831827801529, 'auprc': 0.937721251870462, 'eval_loss': 0.6267895316001028}
Correct predictions are:  4351
Total predictions are:  5000
Accuracy on test set is: 0.8702 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.745624894532398, 'tp': 2232, 'tn': 2131, 'fp': 362, 'fn': 275, 'auroc': 0.9477616704514964, 'auprc': 0.9440835272950213, 'eval_loss': 0.6065647342439741}
Correct predictions are:  4363
Total predictions are:  5000
Accuracy on test set is: 0.8726 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.744244645741139, 'tp': 2156, 'tn': 2204, 'fp': 289, 'fn': 351, 'auroc': 0.9468330231709016, 'auprc': 0.9414396962045122, 'eval_loss': 0.6526010587573051}
Correct predictions are:  4360
Total predictions are:  5000
Accuracy on test set is: 0.872 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7304937458390891, 'tp': 2151, 'tn': 2175, 'fp': 318, 'fn': 356, 'auroc': 0.90443685078491, 'auprc': 0.817381778329882, 'eval_loss': 0.6467704930521547}
Correct predictions are:  4326
Total predictions are:  5000
Accuracy on test set is: 0.8652 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.
{'mcc': 0.736241056498489, 'tp': 2230, 'tn': 2109, 'fp': 384, 'fn': 277, 'auroc': 0.9433863561490323, 'auprc': 0.9397430396541538, 'eval_loss': 0.6792863594057038}
Correct predictions are:  4339
Total predictions are:  5000
Accuracy on test set is: 0.8678 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7431976492579876, 'tp': 2187, 'tn': 2171, 'fp': 322, 'fn': 320, 'auroc': 0.9437730791809407, 'auprc': 0.9377147053160899, 'eval_loss': 0.7153602475833147}
Correct predictions are:  4358
Total predictions are:  5000
Accuracy on test set is: 0.8716 


[0.8754, 0.8716, 0.8762, 0.8758, 0.8702, 0.8726, 0.872, 0.8652, 0.8678, 0.8716]


 Over all runs maximum accuracies are: [0.872, 0.8762, 0.8768, 0.8802, 0.8822]
The median is: 0.8768
RoBERTa Accuracy Score on Test set ->  ['0.8768 +/- 0.00539999999999996']


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.



* * * * EVALUATION USING RSW AS PREPROCESSING FUNCTION * * * *

RUN NUMBER:  1


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7293743619541724, 'tp': 2219, 'tn': 2103, 'fp': 390, 'fn': 288, 'auroc': 0.9397681677824354, 'auprc': 0.9367041449693223, 'eval_loss': 0.34115753884613514}
Correct predictions are:  4322
Total predictions are:  5000
Accuracy on test set is: 0.8644 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7605333727980336, 'tp': 2255, 'tn': 2145, 'fp': 348, 'fn': 252, 'auroc': 0.9508824949187603, 'auprc': 0.9468960548246449, 'eval_loss': 0.3727127305522561}
Correct predictions are:  4400
Total predictions are:  5000
Accuracy on test set is: 0.88 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7536269421955518, 'tp': 2270, 'tn': 2111, 'fp': 382, 'fn': 237, 'auroc': 0.9483312749171954, 'auprc': 0.9442384198642823, 'eval_loss': 0.4892018730096519}
Correct predictions are:  4381
Total predictions are:  5000
Accuracy on test set is: 0.8762 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7584265628850578, 'tp': 2217, 'tn': 2179, 'fp': 314, 'fn': 290, 'auroc': 0.9492508821269159, 'auprc': 0.9476288828112392, 'eval_loss': 0.4862672903157771}
Correct predictions are:  4396
Total predictions are:  5000
Accuracy on test set is: 0.8792 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7490956671717809, 'tp': 2317, 'tn': 2046, 'fp': 447, 'fn': 190, 'auroc': 0.9475535088195093, 'auprc': 0.943909704307898, 'eval_loss': 0.5797138031922281}
Correct predictions are:  4363
Total predictions are:  5000
Accuracy on test set is: 0.8726 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7596054607320758, 'tp': 2202, 'tn': 2197, 'fp': 296, 'fn': 305, 'auroc': 0.9469287839216659, 'auprc': 0.9442992839093326, 'eval_loss': 0.5746318032756448}
Correct predictions are:  4399
Total predictions are:  5000
Accuracy on test set is: 0.8798 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7591978156597706, 'tp': 2207, 'tn': 2191, 'fp': 302, 'fn': 300, 'auroc': 0.9468895836143356, 'auprc': 0.9438998811856423, 'eval_loss': 0.5948348782308399}
Correct predictions are:  4398
Total predictions are:  5000
Accuracy on test set is: 0.8796 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7616519887715262, 'tp': 2225, 'tn': 2179, 'fp': 314, 'fn': 282, 'auroc': 0.948580556871566, 'auprc': 0.9461888248483467, 'eval_loss': 0.6165391846235841}
Correct predictions are:  4404
Total predictions are:  5000
Accuracy on test set is: 0.8808 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7526704956274047, 'tp': 2165, 'tn': 2216, 'fp': 277, 'fn': 342, 'auroc': 0.9472659865653347, 'auprc': 0.9458898220894552, 'eval_loss': 0.6475440511304885}
Correct predictions are:  4381
Total predictions are:  5000
Accuracy on test set is: 0.8762 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7588569981219225, 'tp': 2191, 'tn': 2206, 'fp': 287, 'fn': 316, 'auroc': 0.9446450860174743, 'auprc': 0.9413404175719355, 'eval_loss': 0.6502805613163859}
Correct predictions are:  4397
Total predictions are:  5000
Accuracy on test set is: 0.8794 


[0.8644, 0.88, 0.8762, 0.8792, 0.8726, 0.8798, 0.8796, 0.8808, 0.8762, 0.8794]

RUN NUMBER:  2


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7621553191432634, 'tp': 2236, 'tn': 2169, 'fp': 324, 'fn': 271, 'auroc': 0.9516497009336553, 'auprc': 0.9490154512000302, 'eval_loss': 0.3220504490166903}
Correct predictions are:  4405
Total predictions are:  5000
Accuracy on test set is: 0.881 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7780309485435963, 'tp': 2242, 'tn': 2203, 'fp': 290, 'fn': 265, 'auroc': 0.9544033225220485, 'auprc': 0.9512035661277802, 'eval_loss': 0.3455075358271599}
Correct predictions are:  4445
Total predictions are:  5000
Accuracy on test set is: 0.889 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7588048565696124, 'tp': 2212, 'tn': 2185, 'fp': 308, 'fn': 295, 'auroc': 0.9518261823172695, 'auprc': 0.9501749723310771, 'eval_loss': 0.4884618790850043}
Correct predictions are:  4397
Total predictions are:  5000
Accuracy on test set is: 0.8794 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7540029556973795, 'tp': 2196, 'tn': 2189, 'fp': 304, 'fn': 311, 'auroc': 0.9471530256797214, 'auprc': 0.9452522643868929, 'eval_loss': 0.48962019372433424}
Correct predictions are:  4385
Total predictions are:  5000
Accuracy on test set is: 0.877 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.762955552779766, 'tp': 2237, 'tn': 2170, 'fp': 323, 'fn': 270, 'auroc': 0.9512084974746202, 'auprc': 0.949313042347969, 'eval_loss': 0.531495255420357}
Correct predictions are:  4407
Total predictions are:  5000
Accuracy on test set is: 0.8814 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7479492878668187, 'tp': 2231, 'tn': 2138, 'fp': 355, 'fn': 276, 'auroc': 0.9471640657662757, 'auprc': 0.9448489960194395, 'eval_loss': 0.6315239693775773}
Correct predictions are:  4369
Total predictions are:  5000
Accuracy on test set is: 0.8738 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7612049427816553, 'tp': 2215, 'tn': 2188, 'fp': 305, 'fn': 292, 'auroc': 0.9494405636140187, 'auprc': 0.9466400599460391, 'eval_loss': 0.624727178736031}
Correct predictions are:  4403
Total predictions are:  5000
Accuracy on test set is: 0.8806 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7464767018145363, 'tp': 2173, 'tn': 2193, 'fp': 300, 'fn': 334, 'auroc': 0.9466015013557707, 'auprc': 0.9432073089186891, 'eval_loss': 0.665994849903509}
Correct predictions are:  4366
Total predictions are:  5000
Accuracy on test set is: 0.8732 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.8352287881936994, 'auprc': 0.8453422305419194, 'eval_loss': 0.6935038820266723}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.8226105292665494, 'auprc': 0.8279869618026969, 'eval_loss': 0.6931739237785339}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 


[0.881, 0.889, 0.8794, 0.877, 0.8814, 0.8738, 0.8806, 0.8732, 0.4986, 0.4986]

RUN NUMBER:  3


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7489214941497685, 'tp': 2301, 'tn': 2064, 'fp': 429, 'fn': 206, 'auroc': 0.948307754732797, 'auprc': 0.945615547542877, 'eval_loss': 0.33381724987626077}
Correct predictions are:  4365
Total predictions are:  5000
Accuracy on test set is: 0.873 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7624068975719083, 'tp': 2205, 'tn': 2201, 'fp': 292, 'fn': 302, 'auroc': 0.9518615425944941, 'auprc': 0.9501071452326669, 'eval_loss': 0.38468108099251985}
Correct predictions are:  4406
Total predictions are:  5000
Accuracy on test set is: 0.8812 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7612633808067417, 'tp': 2226, 'tn': 2177, 'fp': 316, 'fn': 281, 'auroc': 0.9522487456301658, 'auprc': 0.9512410385948376, 'eval_loss': 0.5022588644206524}
Correct predictions are:  4403
Total predictions are:  5000
Accuracy on test set is: 0.8806 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7610778221021248, 'tp': 2243, 'tn': 2159, 'fp': 334, 'fn': 264, 'auroc': 0.951414179087164, 'auprc': 0.9495113992941444, 'eval_loss': 0.5297269295856356}
Correct predictions are:  4402
Total predictions are:  5000
Accuracy on test set is: 0.8804 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7594007007534703, 'tp': 2236, 'tn': 2162, 'fp': 331, 'fn': 271, 'auroc': 0.9492574421783467, 'auprc': 0.9459478549302269, 'eval_loss': 0.5488987899541855}
Correct predictions are:  4398
Total predictions are:  5000
Accuracy on test set is: 0.8796 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.741316981270864, 'tp': 2231, 'tn': 2121, 'fp': 372, 'fn': 276, 'auroc': 0.9452570908155921, 'auprc': 0.9418820770256108, 'eval_loss': 0.6006645163103939}
Correct predictions are:  4352
Total predictions are:  5000
Accuracy on test set is: 0.8704 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7376546588978943, 'tp': 2119, 'tn': 2223, 'fp': 270, 'fn': 388, 'auroc': 0.944717966588858, 'auprc': 0.9426702089638124, 'eval_loss': 0.6081529001742602}
Correct predictions are:  4342
Total predictions are:  5000
Accuracy on test set is: 0.8684 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7513344166987258, 'tp': 2221, 'tn': 2157, 'fp': 336, 'fn': 286, 'auroc': 0.9415839420181056, 'auprc': 0.9395724683704286, 'eval_loss': 0.5537850591644645}
Correct predictions are:  4378
Total predictions are:  5000
Accuracy on test set is: 0.8756 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7419016983471671, 'tp': 2221, 'tn': 2133, 'fp': 360, 'fn': 286, 'auroc': 0.943091153834646, 'auprc': 0.9414077748727216, 'eval_loss': 0.6184026280805468}
Correct predictions are:  4354
Total predictions are:  5000
Accuracy on test set is: 0.8708 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7366504410019687, 'tp': 2146, 'tn': 2195, 'fp': 298, 'fn': 361, 'auroc': 0.9331335557670772, 'auprc': 0.9207825857737053, 'eval_loss': 0.6236876645244658}
Correct predictions are:  4341
Total predictions are:  5000
Accuracy on test set is: 0.8682 


[0.873, 0.8812, 0.8806, 0.8804, 0.8796, 0.8704, 0.8684, 0.8756, 0.8708, 0.8682]

RUN NUMBER:  4


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7584399917311166, 'tp': 2264, 'tn': 2130, 'fp': 363, 'fn': 243, 'auroc': 0.946219098357731, 'auprc': 0.9444951238539083, 'eval_loss': 0.30262148480117324}
Correct predictions are:  4394
Total predictions are:  5000
Accuracy on test set is: 0.8788 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7704016001117364, 'tp': 2261, 'tn': 2164, 'fp': 329, 'fn': 246, 'auroc': 0.9531125124020973, 'auprc': 0.9506728522625816, 'eval_loss': 0.36307967142909764}
Correct predictions are:  4425
Total predictions are:  5000
Accuracy on test set is: 0.885 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7612571004637398, 'tp': 2194, 'tn': 2209, 'fp': 284, 'fn': 313, 'auroc': 0.9527085892353395, 'auprc': 0.9491786199251743, 'eval_loss': 0.4409249721094966}
Correct predictions are:  4403
Total predictions are:  5000
Accuracy on test set is: 0.8806 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7488687653692514, 'tp': 2238, 'tn': 2133, 'fp': 360, 'fn': 269, 'auroc': 0.9519426632304798, 'auprc': 0.9514683526300309, 'eval_loss': 0.5400539877392352}
Correct predictions are:  4371
Total predictions are:  5000
Accuracy on test set is: 0.8742 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7528605728279822, 'tp': 2183, 'tn': 2199, 'fp': 294, 'fn': 324, 'auroc': 0.9492265619362455, 'auprc': 0.9477787950705592, 'eval_loss': 0.5615691037885845}
Correct predictions are:  4382
Total predictions are:  5000
Accuracy on test set is: 0.8764 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7550668204235277, 'tp': 2235, 'tn': 2152, 'fp': 341, 'fn': 272, 'auroc': 0.94727670664938, 'auprc': 0.936543164652927, 'eval_loss': 0.5820928734473884}
Correct predictions are:  4387
Total predictions are:  5000
Accuracy on test set is: 0.8774 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7449327274672131, 'tp': 2213, 'tn': 2149, 'fp': 344, 'fn': 294, 'auroc': 0.9469453440514973, 'auprc': 0.9447260101431891, 'eval_loss': 0.609362117189914}
Correct predictions are:  4362
Total predictions are:  5000
Accuracy on test set is: 0.8724 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7478229574004067, 'tp': 2162, 'tn': 2207, 'fp': 286, 'fn': 345, 'auroc': 0.9464734203516156, 'auprc': 0.9371074941712151, 'eval_loss': 0.5569398116566241}
Correct predictions are:  4369
Total predictions are:  5000
Accuracy on test set is: 0.8738 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.7123438247755862, 'auprc': 0.6523870425891141, 'eval_loss': 0.6933046461105347}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.38230043723542795, 'auprc': 0.4473548956272306, 'eval_loss': 0.6938814094543457}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 


[0.8788, 0.885, 0.8806, 0.8742, 0.8764, 0.8774, 0.8724, 0.8738, 0.4986, 0.5014]

RUN NUMBER:  5


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7561521876947223, 'tp': 2310, 'tn': 2073, 'fp': 420, 'fn': 197, 'auroc': 0.9491180010851284, 'auprc': 0.9445719110276249, 'eval_loss': 0.33389622770547867}
Correct predictions are:  4383
Total predictions are:  5000
Accuracy on test set is: 0.8766 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7677235784525273, 'tp': 2308, 'tn': 2106, 'fp': 387, 'fn': 199, 'auroc': 0.951775621920876, 'auprc': 0.9481609440159169, 'eval_loss': 0.38593227799534796}
Correct predictions are:  4414
Total predictions are:  5000
Accuracy on test set is: 0.8828 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7741897708063701, 'tp': 2317, 'tn': 2113, 'fp': 380, 'fn': 190, 'auroc': 0.9548430059691668, 'auprc': 0.9534783415546977, 'eval_loss': 0.4509643016181886}
Correct predictions are:  4430
Total predictions are:  5000
Accuracy on test set is: 0.886 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7728619618541134, 'tp': 2304, 'tn': 2124, 'fp': 369, 'fn': 203, 'auroc': 0.9542958016790851, 'auprc': 0.9522247485781313, 'eval_loss': 0.49927187352329494}
Correct predictions are:  4428
Total predictions are:  5000
Accuracy on test set is: 0.8856 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7609620713190846, 'tp': 2271, 'tn': 2129, 'fp': 364, 'fn': 236, 'auroc': 0.9510660963581955, 'auprc': 0.9490071024806123, 'eval_loss': 0.514724832431227}
Correct predictions are:  4400
Total predictions are:  5000
Accuracy on test set is: 0.88 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7443980535274449, 'tp': 2190, 'tn': 2171, 'fp': 322, 'fn': 317, 'auroc': 0.9476714297440091, 'auprc': 0.9440794945626995, 'eval_loss': 0.6118063838850707}
Correct predictions are:  4361
Total predictions are:  5000
Accuracy on test set is: 0.8722 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7652084590866538, 'tp': 2208, 'tn': 2205, 'fp': 288, 'fn': 299, 'auroc': 0.9530277917378873, 'auprc': 0.9516929918662352, 'eval_loss': 0.6132755567330868}
Correct predictions are:  4413
Total predictions are:  5000
Accuracy on test set is: 0.8826 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7695434494713134, 'tp': 2257, 'tn': 2166, 'fp': 327, 'fn': 250, 'auroc': 0.9318746658973807, 'auprc': 0.94090165806307, 'eval_loss': 0.6310512205287814}
Correct predictions are:  4423
Total predictions are:  5000
Accuracy on test set is: 0.8846 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7623578721098851, 'tp': 2249, 'tn': 2156, 'fp': 337, 'fn': 258, 'auroc': 0.9508007342777568, 'auprc': 0.9474442313072299, 'eval_loss': 0.6915590371951461}
Correct predictions are:  4405
Total predictions are:  5000
Accuracy on test set is: 0.881 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7597450374619739, 'tp': 2269, 'tn': 2128, 'fp': 365, 'fn': 238, 'auroc': 0.9500167281311486, 'auprc': 0.9474014870507035, 'eval_loss': 0.6811334231186658}
Correct predictions are:  4397
Total predictions are:  5000
Accuracy on test set is: 0.8794 


[0.8766, 0.8828, 0.886, 0.8856, 0.88, 0.8722, 0.8826, 0.8846, 0.881, 0.8794]


 Over all runs maximum accuracies are: [0.8808, 0.8812, 0.885, 0.886, 0.889]
The median is: 0.885
RoBERTa Accuracy Score on Test set ->  ['0.885 +/- 0.0041999999999999815']


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.



* * * * EVALUATION USING STM AS PREPROCESSING FUNCTION * * * *

RUN NUMBER:  1


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7009546002150977, 'tp': 2108, 'tn': 2144, 'fp': 349, 'fn': 399, 'auroc': 0.9296906487746863, 'auprc': 0.9283358488505881, 'eval_loss': 0.35517575298547743}
Correct predictions are:  4252
Total predictions are:  5000
Accuracy on test set is: 0.8504 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.696405055133256, 'tp': 2135, 'tn': 2106, 'fp': 387, 'fn': 372, 'auroc': 0.9334620383423805, 'auprc': 0.9314048525087081, 'eval_loss': 0.4020637815028429}
Correct predictions are:  4241
Total predictions are:  5000
Accuracy on test set is: 0.8482 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7044820342233097, 'tp': 2062, 'tn': 2196, 'fp': 297, 'fn': 445, 'auroc': 0.931941546421724, 'auprc': 0.9298181402437191, 'eval_loss': 0.5172499976791441}
Correct predictions are:  4258
Total predictions are:  5000
Accuracy on test set is: 0.8516 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6953303074943498, 'tp': 2152, 'tn': 2086, 'fp': 407, 'fn': 355, 'auroc': 0.9297746494332517, 'auprc': 0.9299414071312071, 'eval_loss': 0.6238221030883491}
Correct predictions are:  4238
Total predictions are:  5000
Accuracy on test set is: 0.8476 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7030425716608381, 'tp': 2170, 'tn': 2087, 'fp': 406, 'fn': 337, 'auroc': 0.9299552908494804, 'auprc': 0.9280296004969757, 'eval_loss': 0.7086980499651283}
Correct predictions are:  4257
Total predictions are:  5000
Accuracy on test set is: 0.8514 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.703274541866861, 'tp': 2021, 'tn': 2230, 'fp': 263, 'fn': 486, 'auroc': 0.9284495990448565, 'auprc': 0.9279497740296514, 'eval_loss': 0.7409537275820971}
Correct predictions are:  4251
Total predictions are:  5000
Accuracy on test set is: 0.8502 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6940562352018441, 'tp': 2171, 'tn': 2063, 'fp': 430, 'fn': 336, 'auroc': 0.9267606258033062, 'auprc': 0.9214999548786971, 'eval_loss': 0.830249633333087}
Correct predictions are:  4234
Total predictions are:  5000
Accuracy on test set is: 0.8468 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6996349642039268, 'tp': 2146, 'tn': 2103, 'fp': 390, 'fn': 361, 'auroc': 0.9261887013194182, 'auprc': 0.9217685378818329, 'eval_loss': 0.8971391244677827}
Correct predictions are:  4249
Total predictions are:  5000
Accuracy on test set is: 0.8498 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6976733981407301, 'tp': 2149, 'tn': 2095, 'fp': 398, 'fn': 358, 'auroc': 0.9270366279671632, 'auprc': 0.9243911484904347, 'eval_loss': 0.88292425124906}
Correct predictions are:  4244
Total predictions are:  5000
Accuracy on test set is: 0.8488 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7004476084412089, 'tp': 2149, 'tn': 2102, 'fp': 391, 'fn': 358, 'auroc': 0.9267535057474849, 'auprc': 0.9209734307075881, 'eval_loss': 0.9100701664317399}
Correct predictions are:  4251
Total predictions are:  5000
Accuracy on test set is: 0.8502 


[0.8504, 0.8482, 0.8516, 0.8476, 0.8514, 0.8502, 0.8468, 0.8498, 0.8488, 0.8502]

RUN NUMBER:  2


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6962775661115856, 'tp': 2164, 'tn': 2076, 'fp': 417, 'fn': 343, 'auroc': 0.9290663238799793, 'auprc': 0.9276368707366908, 'eval_loss': 0.35113682252168654}
Correct predictions are:  4240
Total predictions are:  5000
Accuracy on test set is: 0.848 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7119977420622978, 'tp': 2147, 'tn': 2133, 'fp': 360, 'fn': 360, 'auroc': 0.9302396930791939, 'auprc': 0.9273486054182158, 'eval_loss': 0.4655189885869622}
Correct predictions are:  4280
Total predictions are:  5000
Accuracy on test set is: 0.856 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6868346788603431, 'tp': 2104, 'tn': 2113, 'fp': 380, 'fn': 403, 'auroc': 0.9259822197006025, 'auprc': 0.9236615295322363, 'eval_loss': 0.5458720180243254}
Correct predictions are:  4217
Total predictions are:  5000
Accuracy on test set is: 0.8434 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.700364955182013, 'tp': 2174, 'tn': 2076, 'fp': 417, 'fn': 333, 'auroc': 0.9276859930581856, 'auprc': 0.9259644414853757, 'eval_loss': 0.5473596209585667}
Correct predictions are:  4250
Total predictions are:  5000
Accuracy on test set is: 0.85 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7092196539041776, 'tp': 2135, 'tn': 2138, 'fp': 355, 'fn': 372, 'auroc': 0.9312048206457939, 'auprc': 0.9297316119388876, 'eval_loss': 0.6599725711461156}
Correct predictions are:  4273
Total predictions are:  5000
Accuracy on test set is: 0.8546 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6971628205846693, 'tp': 2170, 'tn': 2072, 'fp': 421, 'fn': 337, 'auroc': 0.9289692031185524, 'auprc': 0.9280193447701873, 'eval_loss': 0.6293992947548628}
Correct predictions are:  4242
Total predictions are:  5000
Accuracy on test set is: 0.8484 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6922302860752764, 'tp': 2156, 'tn': 2074, 'fp': 419, 'fn': 351, 'auroc': 0.9253427746873536, 'auprc': 0.9222921923760108, 'eval_loss': 0.7689650994103402}
Correct predictions are:  4230
Total predictions are:  5000
Accuracy on test set is: 0.846 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7042980590965099, 'tp': 2175, 'tn': 2085, 'fp': 408, 'fn': 332, 'auroc': 0.9280813561578323, 'auprc': 0.9251409208839824, 'eval_loss': 0.7518028452455997}
Correct predictions are:  4260
Total predictions are:  5000
Accuracy on test set is: 0.852 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6878073617342549, 'tp': 2149, 'tn': 2070, 'fp': 423, 'fn': 358, 'auroc': 0.9221956300137393, 'auprc': 0.9195219704449988, 'eval_loss': 0.7957986779883504}
Correct predictions are:  4219
Total predictions are:  5000
Accuracy on test set is: 0.8438 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6800038400203736, 'tp': 2103, 'tn': 2097, 'fp': 396, 'fn': 404, 'auroc': 0.9157981398574165, 'auprc': 0.9143947825636858, 'eval_loss': 0.7444862623527646}
Correct predictions are:  4200
Total predictions are:  5000
Accuracy on test set is: 0.84 


[0.848, 0.856, 0.8434, 0.85, 0.8546, 0.8484, 0.846, 0.852, 0.8438, 0.84]

RUN NUMBER:  3


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7076701657599239, 'tp': 2125, 'tn': 2144, 'fp': 349, 'fn': 382, 'auroc': 0.9309162583834657, 'auprc': 0.9224820966966296, 'eval_loss': 0.372013553661108}
Correct predictions are:  4269
Total predictions are:  5000
Accuracy on test set is: 0.8538 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.705386553445088, 'tp': 2111, 'tn': 2152, 'fp': 341, 'fn': 396, 'auroc': 0.9312653811205879, 'auprc': 0.9294984467673207, 'eval_loss': 0.4668429206892848}
Correct predictions are:  4263
Total predictions are:  5000
Accuracy on test set is: 0.8526 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7152604525640639, 'tp': 2169, 'tn': 2119, 'fp': 374, 'fn': 338, 'auroc': 0.9330393150282299, 'auprc': 0.9309726845288907, 'eval_loss': 0.5124172521457077}
Correct predictions are:  4288
Total predictions are:  5000
Accuracy on test set is: 0.8576 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6985005539082422, 'tp': 2110, 'tn': 2136, 'fp': 357, 'fn': 397, 'auroc': 0.9296596085313309, 'auprc': 0.9280420182805817, 'eval_loss': 0.6094039006084204}
Correct predictions are:  4246
Total predictions are:  5000
Accuracy on test set is: 0.8492 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6915092794555154, 'tp': 2085, 'tn': 2143, 'fp': 350, 'fn': 422, 'auroc': 0.9296803286937769, 'auprc': 0.9283682371584827, 'eval_loss': 0.6954971206389368}
Correct predictions are:  4228
Total predictions are:  5000
Accuracy on test set is: 0.8456 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6906154320204043, 'tp': 2153, 'tn': 2073, 'fp': 420, 'fn': 354, 'auroc': 0.925655657140352, 'auprc': 0.9226817758012226, 'eval_loss': 0.7645437953293324}
Correct predictions are:  4226
Total predictions are:  5000
Accuracy on test set is: 0.8452 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6940348545456171, 'tp': 2113, 'tn': 2122, 'fp': 371, 'fn': 394, 'auroc': 0.9300775318078494, 'auprc': 0.9296277436114715, 'eval_loss': 0.7579326307587325}
Correct predictions are:  4235
Total predictions are:  5000
Accuracy on test set is: 0.847 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6949364813708959, 'tp': 2102, 'tn': 2135, 'fp': 358, 'fn': 405, 'auroc': 0.9284921593785296, 'auprc': 0.9277345059230407, 'eval_loss': 0.7751234385397285}
Correct predictions are:  4237
Total predictions are:  5000
Accuracy on test set is: 0.8474 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6768755211780177, 'tp': 1970, 'tn': 2213, 'fp': 280, 'fn': 537, 'auroc': 0.924603888894489, 'auprc': 0.9194859809017122, 'eval_loss': 0.8327676745507866}
Correct predictions are:  4183
Total predictions are:  5000
Accuracy on test set is: 0.8366 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6993104768189277, 'tp': 2110, 'tn': 2138, 'fp': 355, 'fn': 397, 'auroc': 0.9215885052538813, 'auprc': 0.9216635614757556, 'eval_loss': 0.849721671531722}
Correct predictions are:  4248
Total predictions are:  5000
Accuracy on test set is: 0.8496 


[0.8538, 0.8526, 0.8576, 0.8492, 0.8456, 0.8452, 0.847, 0.8474, 0.8366, 0.8496]

RUN NUMBER:  4


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6988241491909721, 'tp': 2143, 'tn': 2104, 'fp': 389, 'fn': 364, 'auroc': 0.9302612132479119, 'auprc': 0.928554425830902, 'eval_loss': 0.35312154245376587}
Correct predictions are:  4247
Total predictions are:  5000
Accuracy on test set is: 0.8494 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6868107158855882, 'tp': 2125, 'tn': 2092, 'fp': 401, 'fn': 382, 'auroc': 0.9247371699394121, 'auprc': 0.9225465939386073, 'eval_loss': 0.4177997039586306}
Correct predictions are:  4217
Total predictions are:  5000
Accuracy on test set is: 0.8434 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6967975921821167, 'tp': 2131, 'tn': 2111, 'fp': 382, 'fn': 376, 'auroc': 0.9307864973661393, 'auprc': 0.9308617685329467, 'eval_loss': 0.6382534563928842}
Correct predictions are:  4242
Total predictions are:  5000
Accuracy on test set is: 0.8484 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7019649013850646, 'tp': 2227, 'tn': 2023, 'fp': 470, 'fn': 280, 'auroc': 0.9313784220068284, 'auprc': 0.9307334149455241, 'eval_loss': 0.6641140896454454}
Correct predictions are:  4250
Total predictions are:  5000
Accuracy on test set is: 0.85 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6923972506335465, 'tp': 2125, 'tn': 2106, 'fp': 387, 'fn': 382, 'auroc': 0.9269584673543841, 'auprc': 0.926594233633662, 'eval_loss': 0.693100958045572}
Correct predictions are:  4231
Total predictions are:  5000
Accuracy on test set is: 0.8462 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6939606859873255, 'tp': 2166, 'tn': 2068, 'fp': 425, 'fn': 341, 'auroc': 0.923511240328124, 'auprc': 0.903446087014565, 'eval_loss': 0.7262590364042669}
Correct predictions are:  4234
Total predictions are:  5000
Accuracy on test set is: 0.8468 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6817086813596699, 'tp': 2045, 'tn': 2157, 'fp': 336, 'fn': 462, 'auroc': 0.926267021933452, 'auprc': 0.9261743226489563, 'eval_loss': 0.8025477691512555}
Correct predictions are:  4202
Total predictions are:  5000
Accuracy on test set is: 0.8404 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6820227069040519, 'tp': 2122, 'tn': 2083, 'fp': 410, 'fn': 385, 'auroc': 0.9228515551561923, 'auprc': 0.9189284737980516, 'eval_loss': 0.8780655530236662}
Correct predictions are:  4205
Total predictions are:  5000
Accuracy on test set is: 0.841 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6889640747014868, 'tp': 2018, 'tn': 2199, 'fp': 294, 'fn': 489, 'auroc': 0.9257015775003676, 'auprc': 0.92219235951203, 'eval_loss': 0.8596686394749209}
Correct predictions are:  4217
Total predictions are:  5000
Accuracy on test set is: 0.8434 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6888825524921174, 'tp': 2166, 'tn': 2055, 'fp': 438, 'fn': 341, 'auroc': 0.9208268992828904, 'auprc': 0.9190500208682562, 'eval_loss': 0.8644303023923189}
Correct predictions are:  4221
Total predictions are:  5000
Accuracy on test set is: 0.8442 


[0.8494, 0.8434, 0.8484, 0.85, 0.8462, 0.8468, 0.8404, 0.841, 0.8434, 0.8442]

RUN NUMBER:  5


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6909159797520287, 'tp': 2170, 'tn': 2056, 'fp': 437, 'fn': 337, 'auroc': 0.9266607050199273, 'auprc': 0.9248023448310652, 'eval_loss': 0.39109266243577}
Correct predictions are:  4226
Total predictions are:  5000
Accuracy on test set is: 0.8452 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7009547893862694, 'tp': 2161, 'tn': 2091, 'fp': 402, 'fn': 346, 'auroc': 0.9329351542116089, 'auprc': 0.9337760656964379, 'eval_loss': 0.3986792846277356}
Correct predictions are:  4252
Total predictions are:  5000
Accuracy on test set is: 0.8504 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7052862471574212, 'tp': 2186, 'tn': 2076, 'fp': 417, 'fn': 321, 'auroc': 0.9307823373335248, 'auprc': 0.9273755929957788, 'eval_loss': 0.5415198541864753}
Correct predictions are:  4262
Total predictions are:  5000
Accuracy on test set is: 0.8524 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7068153496112695, 'tp': 2133, 'tn': 2134, 'fp': 359, 'fn': 374, 'auroc': 0.9304708148911887, 'auprc': 0.9306045420087765, 'eval_loss': 0.6351320939809084}
Correct predictions are:  4267
Total predictions are:  5000
Accuracy on test set is: 0.8534 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7007976619435176, 'tp': 2136, 'tn': 2116, 'fp': 377, 'fn': 371, 'auroc': 0.9293420060413273, 'auprc': 0.9288903495899561, 'eval_loss': 0.7087833135116846}
Correct predictions are:  4252
Total predictions are:  5000
Accuracy on test set is: 0.8504 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6990713011104902, 'tp': 2097, 'tn': 2150, 'fp': 343, 'fn': 410, 'auroc': 0.9299738509949919, 'auprc': 0.9282225095645498, 'eval_loss': 0.7674694405358284}
Correct predictions are:  4247
Total predictions are:  5000
Accuracy on test set is: 0.8494 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7051607317206725, 'tp': 2247, 'tn': 2009, 'fp': 484, 'fn': 260, 'auroc': 0.9297445691974224, 'auprc': 0.9266793828772375, 'eval_loss': 0.7475960257872939}
Correct predictions are:  4256
Total predictions are:  5000
Accuracy on test set is: 0.8512 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6980393876357227, 'tp': 2175, 'tn': 2069, 'fp': 424, 'fn': 332, 'auroc': 0.927553032015771, 'auprc': 0.9240689551125425, 'eval_loss': 0.7920675362428651}
Correct predictions are:  4244
Total predictions are:  5000
Accuracy on test set is: 0.8488 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6988195299346134, 'tp': 2122, 'tn': 2125, 'fp': 368, 'fn': 385, 'auroc': 0.9303500939447366, 'auprc': 0.928058357011987, 'eval_loss': 0.8396800988413393}
Correct predictions are:  4247
Total predictions are:  5000
Accuracy on test set is: 0.8494 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.698629400440855, 'tp': 2050, 'tn': 2193, 'fp': 300, 'fn': 457, 'auroc': 0.9299159305408954, 'auprc': 0.9276935456480653, 'eval_loss': 0.8790585098681971}
Correct predictions are:  4243
Total predictions are:  5000
Accuracy on test set is: 0.8486 


[0.8452, 0.8504, 0.8524, 0.8534, 0.8504, 0.8494, 0.8512, 0.8488, 0.8494, 0.8486]


 Over all runs maximum accuracies are: [0.85, 0.8516, 0.8534, 0.856, 0.8576]
The median is: 0.8534
RoBERTa Accuracy Score on Test set ->  ['0.8534 +/- 0.0041999999999999815']


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.



* * * * EVALUATION USING LOW_RSW AS PREPROCESSING FUNCTION * * * *

RUN NUMBER:  1


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7341243572787844, 'tp': 2199, 'tn': 2136, 'fp': 357, 'fn': 308, 'auroc': 0.9423163477601666, 'auprc': 0.9383727734422463, 'eval_loss': 0.3345088079690933}
Correct predictions are:  4335
Total predictions are:  5000
Accuracy on test set is: 0.867 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7456057081104052, 'tp': 2196, 'tn': 2168, 'fp': 325, 'fn': 311, 'auroc': 0.9465490209443241, 'auprc': 0.9439555123959165, 'eval_loss': 0.3699890933960676}
Correct predictions are:  4364
Total predictions are:  5000
Accuracy on test set is: 0.8728 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7520660415063473, 'tp': 2215, 'tn': 2165, 'fp': 328, 'fn': 292, 'auroc': 0.9491184810888916, 'auprc': 0.9456156295764655, 'eval_loss': 0.4319468670256436}
Correct predictions are:  4380
Total predictions are:  5000
Accuracy on test set is: 0.876 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7516425891967645, 'tp': 2184, 'tn': 2195, 'fp': 298, 'fn': 323, 'auroc': 0.9473693473756835, 'auprc': 0.9462362630804652, 'eval_loss': 0.535880296067521}
Correct predictions are:  4379
Total predictions are:  5000
Accuracy on test set is: 0.8758 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7473375507788497, 'tp': 2168, 'tn': 2200, 'fp': 293, 'fn': 339, 'auroc': 0.9448067672850555, 'auprc': 0.9417821200253258, 'eval_loss': 0.6323609350856394}
Correct predictions are:  4368
Total predictions are:  5000
Accuracy on test set is: 0.8736 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7477064517385711, 'tp': 2214, 'tn': 2155, 'fp': 338, 'fn': 293, 'auroc': 0.9456288537302132, 'auprc': 0.9438833058013976, 'eval_loss': 0.5800719235114753}
Correct predictions are:  4369
Total predictions are:  5000
Accuracy on test set is: 0.8738 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7395840670336766, 'tp': 2252, 'tn': 2094, 'fp': 399, 'fn': 255, 'auroc': 0.9448942879712177, 'auprc': 0.9439322718342874, 'eval_loss': 0.6461800488857552}
Correct predictions are:  4346
Total predictions are:  5000
Accuracy on test set is: 0.8692 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7397459738585096, 'tp': 2245, 'tn': 2102, 'fp': 391, 'fn': 262, 'auroc': 0.9455003727229223, 'auprc': 0.9430837558245283, 'eval_loss': 0.7432030852727592}
Correct predictions are:  4347
Total predictions are:  5000
Accuracy on test set is: 0.8694 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7400945216073265, 'tp': 2163, 'tn': 2187, 'fp': 306, 'fn': 344, 'auroc': 0.9448086072994812, 'auprc': 0.9437937603873269, 'eval_loss': 0.7003502524271608}
Correct predictions are:  4350
Total predictions are:  5000
Accuracy on test set is: 0.87 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7264111688614852, 'tp': 2174, 'tn': 2142, 'fp': 351, 'fn': 333, 'auroc': 0.9415513017622058, 'auprc': 0.9403072194350772, 'eval_loss': 0.7375486401893199}
Correct predictions are:  4316
Total predictions are:  5000
Accuracy on test set is: 0.8632 


[0.867, 0.8728, 0.876, 0.8758, 0.8736, 0.8738, 0.8692, 0.8694, 0.87, 0.8632]

RUN NUMBER:  2


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7295855829836899, 'tp': 2260, 'tn': 2059, 'fp': 434, 'fn': 247, 'auroc': 0.943775879202893, 'auprc': 0.9409997755477406, 'eval_loss': 0.3260174299508333}
Correct predictions are:  4319
Total predictions are:  5000
Accuracy on test set is: 0.8638 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7340175561989405, 'tp': 2185, 'tn': 2150, 'fp': 343, 'fn': 322, 'auroc': 0.9398664085526429, 'auprc': 0.9369576098379686, 'eval_loss': 0.35381327896863224}
Correct predictions are:  4335
Total predictions are:  5000
Accuracy on test set is: 0.867 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7200338193931858, 'tp': 2044, 'tn': 2249, 'fp': 244, 'fn': 463, 'auroc': 0.9424101084952506, 'auprc': 0.9402547680885579, 'eval_loss': 0.42473590780049564}
Correct predictions are:  4293
Total predictions are:  5000
Accuracy on test set is: 0.8586 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.739597979414162, 'tp': 2184, 'tn': 2165, 'fp': 328, 'fn': 323, 'auroc': 0.9427041908008559, 'auprc': 0.9389847151102002, 'eval_loss': 0.458896859651804}
Correct predictions are:  4349
Total predictions are:  5000
Accuracy on test set is: 0.8698 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7300673803465341, 'tp': 2278, 'tn': 2040, 'fp': 453, 'fn': 229, 'auroc': 0.9387948001512332, 'auprc': 0.9354046579797612, 'eval_loss': 0.5032958198145032}
Correct predictions are:  4318
Total predictions are:  5000
Accuracy on test set is: 0.8636 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7356000287415669, 'tp': 2181, 'tn': 2158, 'fp': 335, 'fn': 326, 'auroc': 0.9409785772720458, 'auprc': 0.9388551512614829, 'eval_loss': 0.5258682541213929}
Correct predictions are:  4339
Total predictions are:  5000
Accuracy on test set is: 0.8678 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7464162180499373, 'tp': 2200, 'tn': 2166, 'fp': 327, 'fn': 307, 'auroc': 0.9407185752336299, 'auprc': 0.927765517847167, 'eval_loss': 0.5193791669294238}
Correct predictions are:  4366
Total predictions are:  5000
Accuracy on test set is: 0.8732 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7412898804457511, 'tp': 2165, 'tn': 2188, 'fp': 305, 'fn': 342, 'auroc': 0.9440555613956014, 'auprc': 0.9412703082196113, 'eval_loss': 0.5935181762456894}
Correct predictions are:  4353
Total predictions are:  5000
Accuracy on test set is: 0.8706 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7307999116180509, 'tp': 2175, 'tn': 2152, 'fp': 341, 'fn': 332, 'auroc': 0.939736327532808, 'auprc': 0.9355945432313888, 'eval_loss': 0.6646334159016609}
Correct predictions are:  4327
Total predictions are:  5000
Accuracy on test set is: 0.8654 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7292131752815791, 'tp': 2178, 'tn': 2145, 'fp': 348, 'fn': 329, 'auroc': 0.9395954464282998, 'auprc': 0.9397250014815584, 'eval_loss': 0.6526595565244555}
Correct predictions are:  4323
Total predictions are:  5000
Accuracy on test set is: 0.8646 


[0.8638, 0.867, 0.8586, 0.8698, 0.8636, 0.8678, 0.8732, 0.8706, 0.8654, 0.8646]

RUN NUMBER:  3


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7408009913953555, 'tp': 2188, 'tn': 2164, 'fp': 329, 'fn': 319, 'auroc': 0.9450250089960703, 'auprc': 0.943263691918152, 'eval_loss': 0.3692324765279889}
Correct predictions are:  4352
Total predictions are:  5000
Accuracy on test set is: 0.8704 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7522278827004487, 'tp': 2109, 'tn': 2267, 'fp': 226, 'fn': 398, 'auroc': 0.9498354467099022, 'auprc': 0.9465699696871225, 'eval_loss': 0.38212521783560516}
Correct predictions are:  4376
Total predictions are:  5000
Accuracy on test set is: 0.8752 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7450105909466725, 'tp': 2138, 'tn': 2223, 'fp': 270, 'fn': 369, 'auroc': 0.9477751105568667, 'auprc': 0.9461082048102347, 'eval_loss': 0.4823951026245952}
Correct predictions are:  4361
Total predictions are:  5000
Accuracy on test set is: 0.8722 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7551979507046576, 'tp': 2203, 'tn': 2185, 'fp': 308, 'fn': 304, 'auroc': 0.9487338380732905, 'auprc': 0.9465606603656023, 'eval_loss': 0.4922068895615637}
Correct predictions are:  4388
Total predictions are:  5000
Accuracy on test set is: 0.8776 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7460183559529835, 'tp': 2200, 'tn': 2165, 'fp': 328, 'fn': 307, 'auroc': 0.9464731003491067, 'auprc': 0.9439303546744323, 'eval_loss': 0.5491516532219947}
Correct predictions are:  4365
Total predictions are:  5000
Accuracy on test set is: 0.873 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7521841917022959, 'tp': 2247, 'tn': 2132, 'fp': 361, 'fn': 260, 'auroc': 0.930824977667825, 'auprc': 0.8921664916182929, 'eval_loss': 0.6169035782974214}
Correct predictions are:  4379
Total predictions are:  5000
Accuracy on test set is: 0.8758 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.735205300409309, 'tp': 2183, 'tn': 2155, 'fp': 338, 'fn': 324, 'auroc': 0.9455240529085747, 'auprc': 0.9431797414617378, 'eval_loss': 0.6809640691865235}
Correct predictions are:  4338
Total predictions are:  5000
Accuracy on test set is: 0.8676 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7291841665581065, 'tp': 2210, 'tn': 2112, 'fp': 381, 'fn': 297, 'auroc': 0.9403983327229286, 'auprc': 0.9338202943431317, 'eval_loss': 0.7461736776124686}
Correct predictions are:  4322
Total predictions are:  5000
Accuracy on test set is: 0.8644 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7372547292561481, 'tp': 2239, 'tn': 2102, 'fp': 391, 'fn': 268, 'auroc': 0.943870759946758, 'auprc': 0.9412927468601584, 'eval_loss': 0.7823781983880326}
Correct predictions are:  4341
Total predictions are:  5000
Accuracy on test set is: 0.8682 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7409733515582246, 'tp': 2157, 'tn': 2195, 'fp': 298, 'fn': 350, 'auroc': 0.9385155179616608, 'auprc': 0.9236831009062467, 'eval_loss': 0.8362263663223944}
Correct predictions are:  4352
Total predictions are:  5000
Accuracy on test set is: 0.8704 


[0.8704, 0.8752, 0.8722, 0.8776, 0.873, 0.8758, 0.8676, 0.8644, 0.8682, 0.8704]

RUN NUMBER:  4


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7310020387475248, 'tp': 2082, 'tn': 2241, 'fp': 252, 'fn': 425, 'auroc': 0.9428504319473865, 'auprc': 0.9406763969826795, 'eval_loss': 0.3778928537100554}
Correct predictions are:  4323
Total predictions are:  5000
Accuracy on test set is: 0.8646 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.750297601335516, 'tp': 2127, 'tn': 2246, 'fp': 247, 'fn': 380, 'auroc': 0.9491084010098639, 'auprc': 0.9457075451825375, 'eval_loss': 0.3860444620192051}
Correct predictions are:  4373
Total predictions are:  5000
Accuracy on test set is: 0.8746 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7416723393723587, 'tp': 2140, 'tn': 2213, 'fp': 280, 'fn': 367, 'auroc': 0.9447756470410728, 'auprc': 0.9422020049413902, 'eval_loss': 0.4847668361708522}
Correct predictions are:  4353
Total predictions are:  5000
Accuracy on test set is: 0.8706 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7505749447745594, 'tp': 2169, 'tn': 2207, 'fp': 286, 'fn': 338, 'auroc': 0.948524396431268, 'auprc': 0.9437964603704768, 'eval_loss': 0.533191186171025}
Correct predictions are:  4376
Total predictions are:  5000
Accuracy on test set is: 0.8752 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7356053589714888, 'tp': 2172, 'tn': 2167, 'fp': 326, 'fn': 335, 'auroc': 0.9446785262796461, 'auprc': 0.9415675777154349, 'eval_loss': 0.6058280516766011}
Correct predictions are:  4339
Total predictions are:  5000
Accuracy on test set is: 0.8678 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7165714928313701, 'tp': 2075, 'tn': 2213, 'fp': 280, 'fn': 432, 'auroc': 0.9398172881675394, 'auprc': 0.9361588716859792, 'eval_loss': 0.6647594139352441}
Correct predictions are:  4288
Total predictions are:  5000
Accuracy on test set is: 0.8576 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7389795213581515, 'tp': 2154, 'tn': 2193, 'fp': 300, 'fn': 353, 'auroc': 0.945380851785878, 'auprc': 0.941891364696302, 'eval_loss': 0.6664861137799919}
Correct predictions are:  4347
Total predictions are:  5000
Accuracy on test set is: 0.8694 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7293410202825475, 'tp': 2145, 'tn': 2178, 'fp': 315, 'fn': 362, 'auroc': 0.941990985209324, 'auprc': 0.938790946144013, 'eval_loss': 0.7031800151269882}
Correct predictions are:  4323
Total predictions are:  5000
Accuracy on test set is: 0.8646 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7438019527590193, 'tp': 2217, 'tn': 2142, 'fp': 351, 'fn': 290, 'auroc': 0.9431748344907025, 'auprc': 0.9396491447149004, 'eval_loss': 0.7033521355275064}
Correct predictions are:  4359
Total predictions are:  5000
Accuracy on test set is: 0.8718 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7447979992163138, 'tp': 2188, 'tn': 2174, 'fp': 319, 'fn': 319, 'auroc': 0.9472667865716067, 'auprc': 0.9456073074266542, 'eval_loss': 0.7025346083782613}
Correct predictions are:  4362
Total predictions are:  5000
Accuracy on test set is: 0.8724 


[0.8646, 0.8746, 0.8706, 0.8752, 0.8678, 0.8576, 0.8694, 0.8646, 0.8718, 0.8724]

RUN NUMBER:  5


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6744505281501109, 'tp': 2024, 'tn': 2159, 'fp': 334, 'fn': 483, 'auroc': 0.9169227086740359, 'auprc': 0.9156373392127211, 'eval_loss': 0.38831289092302324}
Correct predictions are:  4183
Total predictions are:  5000
Accuracy on test set is: 0.8366 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.665015625758705, 'tp': 2219, 'tn': 1934, 'fp': 559, 'fn': 288, 'auroc': 0.916927028707905, 'auprc': 0.9145568513172213, 'eval_loss': 0.4521172504723072}
Correct predictions are:  4153
Total predictions are:  5000
Accuracy on test set is: 0.8306 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.650776094862397, 'tp': 1902, 'tn': 2211, 'fp': 282, 'fn': 605, 'auroc': 0.9128384366533434, 'auprc': 0.9091098061213512, 'eval_loss': 0.46338995801210403}
Correct predictions are:  4113
Total predictions are:  5000
Accuracy on test set is: 0.8226 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6264625467805716, 'tp': 1861, 'tn': 2190, 'fp': 303, 'fn': 646, 'auroc': 0.9012245856007511, 'auprc': 0.899814337966927, 'eval_loss': 0.4872444494485855}
Correct predictions are:  4051
Total predictions are:  5000
Accuracy on test set is: 0.8102 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6256251411160129, 'tp': 2171, 'tn': 1884, 'fp': 609, 'fn': 336, 'auroc': 0.895147097953248, 'auprc': 0.8973057518587271, 'eval_loss': 0.4738025684118271}
Correct predictions are:  4055
Total predictions are:  5000
Accuracy on test set is: 0.811 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6716438084221517, 'tp': 2113, 'tn': 2066, 'fp': 427, 'fn': 394, 'auroc': 0.9061091038953746, 'auprc': 0.8995535158297131, 'eval_loss': 0.4608134299874306}
Correct predictions are:  4179
Total predictions are:  5000
Accuracy on test set is: 0.8358 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6736628114765332, 'tp': 2196, 'tn': 1983, 'fp': 510, 'fn': 311, 'auroc': 0.9042342891968275, 'auprc': 0.8893470991891286, 'eval_loss': 0.4590101644814014}
Correct predictions are:  4179
Total predictions are:  5000
Accuracy on test set is: 0.8358 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6684401064145428, 'tp': 2080, 'tn': 2091, 'fp': 402, 'fn': 427, 'auroc': 0.9129196372899564, 'auprc': 0.9117906988695907, 'eval_loss': 0.4769076623797417}
Correct predictions are:  4171
Total predictions are:  5000
Accuracy on test set is: 0.8342 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6415890052488151, 'tp': 1960, 'tn': 2139, 'fp': 354, 'fn': 547, 'auroc': 0.8799040984481318, 'auprc': 0.8947132953238721, 'eval_loss': 0.49931569855213165}
Correct predictions are:  4099
Total predictions are:  5000
Accuracy on test set is: 0.8198 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6360209698957415, 'tp': 2043, 'tn': 2047, 'fp': 446, 'fn': 464, 'auroc': 0.8988157667156111, 'auprc': 0.8786380169595102, 'eval_loss': 0.5437996091723442}
Correct predictions are:  4090
Total predictions are:  5000
Accuracy on test set is: 0.818 


[0.8366, 0.8306, 0.8226, 0.8102, 0.811, 0.8358, 0.8358, 0.8342, 0.8198, 0.818]


 Over all runs maximum accuracies are: [0.8366, 0.8732, 0.8752, 0.876, 0.8776]
The median is: 0.8752
RoBERTa Accuracy Score on Test set ->  ['0.8752 +/- 0.03859999999999997']


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.



* * * * EVALUATION USING LOW_STM AS PREPROCESSING FUNCTION * * * *

RUN NUMBER:  1


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.697939642490096, 'tp': 2201, 'tn': 2041, 'fp': 452, 'fn': 306, 'auroc': 0.9301868126646111, 'auprc': 0.9280803073932836, 'eval_loss': 0.37794847138524057}
Correct predictions are:  4242
Total predictions are:  5000
Accuracy on test set is: 0.8484 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7051578170209544, 'tp': 2197, 'tn': 2064, 'fp': 429, 'fn': 310, 'auroc': 0.9291928848722173, 'auprc': 0.9282954587240778, 'eval_loss': 0.37816526057720184}
Correct predictions are:  4261
Total predictions are:  5000
Accuracy on test set is: 0.8522 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.701178105254517, 'tp': 2206, 'tn': 2044, 'fp': 449, 'fn': 301, 'auroc': 0.9297458492074577, 'auprc': 0.9267381669798889, 'eval_loss': 0.573217882759124}
Correct predictions are:  4250
Total predictions are:  5000
Accuracy on test set is: 0.85 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6962708928546999, 'tp': 2183, 'tn': 2056, 'fp': 437, 'fn': 324, 'auroc': 0.9290900040656319, 'auprc': 0.9265922686774322, 'eval_loss': 0.6081291947722435}
Correct predictions are:  4239
Total predictions are:  5000
Accuracy on test set is: 0.8478 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.691082714844828, 'tp': 2158, 'tn': 2069, 'fp': 424, 'fn': 349, 'auroc': 0.9261040606558355, 'auprc': 0.9246272904885913, 'eval_loss': 0.7032160024374724}
Correct predictions are:  4227
Total predictions are:  5000
Accuracy on test set is: 0.8454 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6935016815162578, 'tp': 2088, 'tn': 2145, 'fp': 348, 'fn': 419, 'auroc': 0.9270599081496798, 'auprc': 0.9253387277112008, 'eval_loss': 0.7649449002362788}
Correct predictions are:  4233
Total predictions are:  5000
Accuracy on test set is: 0.8466 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6750195761976816, 'tp': 2048, 'tn': 2138, 'fp': 355, 'fn': 459, 'auroc': 0.9208544994992761, 'auprc': 0.9093046206310815, 'eval_loss': 0.8884706037135794}
Correct predictions are:  4186
Total predictions are:  5000
Accuracy on test set is: 0.8372 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.680520873356668, 'tp': 2133, 'tn': 2068, 'fp': 425, 'fn': 374, 'auroc': 0.9228189149002928, 'auprc': 0.9209873486616785, 'eval_loss': 0.8223840953726321}
Correct predictions are:  4201
Total predictions are:  5000
Accuracy on test set is: 0.8402 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7021374313756602, 'tp': 2111, 'tn': 2144, 'fp': 349, 'fn': 396, 'auroc': 0.9266188646918994, 'auprc': 0.9239263247248025, 'eval_loss': 0.8647957823074423}
Correct predictions are:  4255
Total predictions are:  5000
Accuracy on test set is: 0.851 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6897475002453505, 'tp': 2170, 'tn': 2053, 'fp': 440, 'fn': 337, 'auroc': 0.9238081226556817, 'auprc': 0.9230956532587395, 'eval_loss': 0.9073354321380146}
Correct predictions are:  4223
Total predictions are:  5000
Accuracy on test set is: 0.8446 


[0.8484, 0.8522, 0.85, 0.8478, 0.8454, 0.8466, 0.8372, 0.8402, 0.851, 0.8446]

RUN NUMBER:  2


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.680605134290686, 'tp': 2188, 'tn': 2010, 'fp': 483, 'fn': 319, 'auroc': 0.919321767482657, 'auprc': 0.9182443033949295, 'eval_loss': 0.4140148013591766}
Correct predictions are:  4198
Total predictions are:  5000
Accuracy on test set is: 0.8396 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6878236498658695, 'tp': 2086, 'tn': 2133, 'fp': 360, 'fn': 421, 'auroc': 0.9162182231508695, 'auprc': 0.9106968485534953, 'eval_loss': 0.4458799112856388}
Correct predictions are:  4219
Total predictions are:  5000
Accuracy on test set is: 0.8438 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6869378078521714, 'tp': 2067, 'tn': 2149, 'fp': 344, 'fn': 440, 'auroc': 0.9216616258271464, 'auprc': 0.9171935292862463, 'eval_loss': 0.4506551401346922}
Correct predictions are:  4216
Total predictions are:  5000
Accuracy on test set is: 0.8432 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6856306825137456, 'tp': 2128, 'tn': 2086, 'fp': 407, 'fn': 379, 'auroc': 0.9236980417926477, 'auprc': 0.9206879664649394, 'eval_loss': 0.579861435636878}
Correct predictions are:  4214
Total predictions are:  5000
Accuracy on test set is: 0.8428 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6786341295368674, 'tp': 2247, 'tn': 1938, 'fp': 555, 'fn': 260, 'auroc': 0.9228461151135424, 'auprc': 0.9203882457731191, 'eval_loss': 0.6619789973333478}
Correct predictions are:  4185
Total predictions are:  5000
Accuracy on test set is: 0.837 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6808625772273603, 'tp': 2127, 'tn': 2075, 'fp': 418, 'fn': 380, 'auroc': 0.923771562369049, 'auprc': 0.9207259329035973, 'eval_loss': 0.7272482770498842}
Correct predictions are:  4202
Total predictions are:  5000
Accuracy on test set is: 0.8404 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6920062388787117, 'tp': 2130, 'tn': 2100, 'fp': 393, 'fn': 377, 'auroc': 0.9254270153478004, 'auprc': 0.9242828584796375, 'eval_loss': 0.7081544047176838}
Correct predictions are:  4230
Total predictions are:  5000
Accuracy on test set is: 0.846 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6794044998327535, 'tp': 2160, 'tn': 2037, 'fp': 456, 'fn': 347, 'auroc': 0.9225261926053502, 'auprc': 0.921695808133271, 'eval_loss': 0.724329424612224}
Correct predictions are:  4197
Total predictions are:  5000
Accuracy on test set is: 0.8394 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6656716723899319, 'tp': 2072, 'tn': 2092, 'fp': 401, 'fn': 435, 'auroc': 0.9171976708297394, 'auprc': 0.9154248287068955, 'eval_loss': 0.8111863598320633}
Correct predictions are:  4164
Total predictions are:  5000
Accuracy on test set is: 0.8328 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6777835792959832, 'tp': 2135, 'tn': 2059, 'fp': 434, 'fn': 372, 'auroc': 0.8884975258206025, 'auprc': 0.8285460484464701, 'eval_loss': 0.7589626997958868}
Correct predictions are:  4194
Total predictions are:  5000
Accuracy on test set is: 0.8388 


[0.8396, 0.8438, 0.8432, 0.8428, 0.837, 0.8404, 0.846, 0.8394, 0.8328, 0.8388]

RUN NUMBER:  3


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6838668620169408, 'tp': 2078, 'tn': 2131, 'fp': 362, 'fn': 429, 'auroc': 0.9193412876356951, 'auprc': 0.9182791080326442, 'eval_loss': 0.3748847302377224}
Correct predictions are:  4209
Total predictions are:  5000
Accuracy on test set is: 0.8418 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6905475225169172, 'tp': 2055, 'tn': 2169, 'fp': 324, 'fn': 452, 'auroc': 0.9260626203309433, 'auprc': 0.9253438546226593, 'eval_loss': 0.41324351912140844}
Correct predictions are:  4224
Total predictions are:  5000
Accuracy on test set is: 0.8448 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6899989160681649, 'tp': 2124, 'tn': 2101, 'fp': 392, 'fn': 383, 'auroc': 0.9263293424220447, 'auprc': 0.9258106851884799, 'eval_loss': 0.4834902587994933}
Correct predictions are:  4225
Total predictions are:  5000
Accuracy on test set is: 0.845 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6948348740662031, 'tp': 2114, 'tn': 2123, 'fp': 370, 'fn': 393, 'auroc': 0.9244462876588951, 'auprc': 0.9223017759069428, 'eval_loss': 0.639208352329582}
Correct predictions are:  4237
Total predictions are:  5000
Accuracy on test set is: 0.8474 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6973477741390952, 'tp': 2156, 'tn': 2087, 'fp': 406, 'fn': 351, 'auroc': 0.909909853693253, 'auprc': 0.9147543936693711, 'eval_loss': 0.7425932399753481}
Correct predictions are:  4243
Total predictions are:  5000
Accuracy on test set is: 0.8486 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6923615048390817, 'tp': 2083, 'tn': 2147, 'fp': 346, 'fn': 424, 'auroc': 0.9229253157344754, 'auprc': 0.9206555774247371, 'eval_loss': 0.7396634639095515}
Correct predictions are:  4230
Total predictions are:  5000
Accuracy on test set is: 0.846 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.68437728668898, 'tp': 2197, 'tn': 2010, 'fp': 483, 'fn': 310, 'auroc': 0.9152294153986167, 'auprc': 0.9169368700986416, 'eval_loss': 0.7744782258499414}
Correct predictions are:  4207
Total predictions are:  5000
Accuracy on test set is: 0.8414 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6852523554521107, 'tp': 2131, 'tn': 2082, 'fp': 411, 'fn': 376, 'auroc': 0.9219297879295374, 'auprc': 0.9195178937798032, 'eval_loss': 0.7972701341655105}
Correct predictions are:  4213
Total predictions are:  5000
Accuracy on test set is: 0.8426 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6806892560625509, 'tp': 2165, 'tn': 2035, 'fp': 458, 'fn': 342, 'auroc': 0.9151977351502436, 'auprc': 0.9104967448921027, 'eval_loss': 0.8586006224733778}
Correct predictions are:  4200
Total predictions are:  5000
Accuracy on test set is: 0.84 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6788467844441375, 'tp': 2092, 'tn': 2105, 'fp': 388, 'fn': 415, 'auroc': 0.914594370419864, 'auprc': 0.90995745032735, 'eval_loss': 0.8962638777721673}
Correct predictions are:  4197
Total predictions are:  5000
Accuracy on test set is: 0.8394 


[0.8418, 0.8448, 0.845, 0.8474, 0.8486, 0.846, 0.8414, 0.8426, 0.84, 0.8394]

RUN NUMBER:  4


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6731677741596577, 'tp': 2159, 'tn': 2022, 'fp': 471, 'fn': 348, 'auroc': 0.9173292718614914, 'auprc': 0.9127743418819041, 'eval_loss': 0.3822901164650917}
Correct predictions are:  4181
Total predictions are:  5000
Accuracy on test set is: 0.8362 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6897630822983041, 'tp': 2148, 'tn': 2076, 'fp': 417, 'fn': 359, 'auroc': 0.9260519002468979, 'auprc': 0.925674033746619, 'eval_loss': 0.44667555146217347}
Correct predictions are:  4224
Total predictions are:  5000
Accuracy on test set is: 0.8448 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6833907058823487, 'tp': 2179, 'tn': 2027, 'fp': 466, 'fn': 328, 'auroc': 0.9241525253557988, 'auprc': 0.9226092383958554, 'eval_loss': 0.4912557558760047}
Correct predictions are:  4206
Total predictions are:  5000
Accuracy on test set is: 0.8412 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6892233250270177, 'tp': 2131, 'tn': 2092, 'fp': 401, 'fn': 376, 'auroc': 0.9267430256653213, 'auprc': 0.925811626204212, 'eval_loss': 0.5913916576519609}
Correct predictions are:  4223
Total predictions are:  5000
Accuracy on test set is: 0.8446 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6837357655946616, 'tp': 2063, 'tn': 2145, 'fp': 348, 'fn': 444, 'auroc': 0.9234705200088769, 'auprc': 0.9232260800891959, 'eval_loss': 0.7333864676203579}
Correct predictions are:  4208
Total predictions are:  5000
Accuracy on test set is: 0.8416 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6839181102286881, 'tp': 2170, 'tn': 2038, 'fp': 455, 'fn': 337, 'auroc': 0.9227573944179722, 'auprc': 0.9183184326979285, 'eval_loss': 0.7649931876167655}
Correct predictions are:  4208
Total predictions are:  5000
Accuracy on test set is: 0.8416 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6800038400203736, 'tp': 2103, 'tn': 2097, 'fp': 396, 'fn': 404, 'auroc': 0.9232177180269093, 'auprc': 0.9202147930080457, 'eval_loss': 0.791695812304318}
Correct predictions are:  4200
Total predictions are:  5000
Accuracy on test set is: 0.84 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6848710853397738, 'tp': 2133, 'tn': 2079, 'fp': 414, 'fn': 374, 'auroc': 0.9240955649092288, 'auprc': 0.9237218372898038, 'eval_loss': 0.8082131787404419}
Correct predictions are:  4212
Total predictions are:  5000
Accuracy on test set is: 0.8424 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.

NOW EVALUATE THE TEST DF.




{'mcc': 0.6912065672020108, 'tp': 2116, 'tn': 2112, 'fp': 381, 'fn': 391, 'auroc': 0.9240810047950775, 'auprc': 0.9224374400812051, 'eval_loss': 0.8417959922956303}
Correct predictions are:  4228
Total predictions are:  5000
Accuracy on test set is: 0.8456 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6786634210657179, 'tp': 2012, 'tn': 2180, 'fp': 313, 'fn': 495, 'auroc': 0.9218940276491768, 'auprc': 0.9203543527278646, 'eval_loss': 0.9280993834545835}
Correct predictions are:  4192
Total predictions are:  5000
Accuracy on test set is: 0.8384 


[0.8362, 0.8448, 0.8412, 0.8446, 0.8416, 0.8416, 0.84, 0.8424, 0.8456, 0.8384]

RUN NUMBER:  5


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6924362053357651, 'tp': 2208, 'tn': 2019, 'fp': 474, 'fn': 299, 'auroc': 0.926248701789822, 'auprc': 0.9235339629377268, 'eval_loss': 0.36578804659843445}
Correct predictions are:  4227
Total predictions are:  5000
Accuracy on test set is: 0.8454 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7092606136140855, 'tp': 2218, 'tn': 2052, 'fp': 441, 'fn': 289, 'auroc': 0.9313484217716266, 'auprc': 0.9277314802915432, 'eval_loss': 0.43005367728471755}
Correct predictions are:  4270
Total predictions are:  5000
Accuracy on test set is: 0.854 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7000581312041496, 'tp': 2117, 'tn': 2133, 'fp': 360, 'fn': 390, 'auroc': 0.9288801624204732, 'auprc': 0.9262192040265543, 'eval_loss': 0.5474200330212712}
Correct predictions are:  4250
Total predictions are:  5000
Accuracy on test set is: 0.85 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7032324765665006, 'tp': 2150, 'tn': 2108, 'fp': 385, 'fn': 357, 'auroc': 0.9306747364899342, 'auprc': 0.9282618040443918, 'eval_loss': 0.5976070293866098}
Correct predictions are:  4258
Total predictions are:  5000
Accuracy on test set is: 0.8516 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.693559596894576, 'tp': 2217, 'tn': 2012, 'fp': 481, 'fn': 290, 'auroc': 0.9298061696803703, 'auprc': 0.9275012206032858, 'eval_loss': 0.6527680079571903}
Correct predictions are:  4229
Total predictions are:  5000
Accuracy on test set is: 0.8458 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6732082696409851, 'tp': 2056, 'tn': 2126, 'fp': 367, 'fn': 451, 'auroc': 0.9226297934175803, 'auprc': 0.9211102565275882, 'eval_loss': 0.7004831875368953}
Correct predictions are:  4182
Total predictions are:  5000
Accuracy on test set is: 0.8364 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6792264057398154, 'tp': 2096, 'tn': 2102, 'fp': 391, 'fn': 411, 'auroc': 0.9203428154876733, 'auprc': 0.9181003681586497, 'eval_loss': 0.6923118867114186}
Correct predictions are:  4198
Total predictions are:  5000
Accuracy on test set is: 0.8396 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6900194250372897, 'tp': 2111, 'tn': 2114, 'fp': 379, 'fn': 396, 'auroc': 0.9260883805329033, 'auprc': 0.9235421579211411, 'eval_loss': 0.6812605192236603}
Correct predictions are:  4225
Total predictions are:  5000
Accuracy on test set is: 0.845 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6811477045988109, 'tp': 2036, 'tn': 2164, 'fp': 329, 'fn': 471, 'auroc': 0.9245764486793576, 'auprc': 0.9232265200828955, 'eval_loss': 0.7518694971553981}
Correct predictions are:  4200
Total predictions are:  5000
Accuracy on test set is: 0.84 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6840257981754152, 'tp': 2187, 'tn': 2020, 'fp': 473, 'fn': 320, 'auroc': 0.9073927939595048, 'auprc': 0.9175333912995385, 'eval_loss': 0.7612351960033178}
Correct predictions are:  4207
Total predictions are:  5000
Accuracy on test set is: 0.8414 


[0.8454, 0.854, 0.85, 0.8516, 0.8458, 0.8364, 0.8396, 0.845, 0.84, 0.8414]


 Over all runs maximum accuracies are: [0.8456, 0.846, 0.8486, 0.8522, 0.854]
The median is: 0.8486
RoBERTa Accuracy Score on Test set ->  ['0.8486 +/- 0.00539999999999996']


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.



* * * * EVALUATION USING RSW_LOW AS PREPROCESSING FUNCTION * * * *

RUN NUMBER:  1


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7421622363551328, 'tp': 2121, 'tn': 2232, 'fp': 261, 'fn': 386, 'auroc': 0.9482403942046906, 'auprc': 0.9453562117828221, 'eval_loss': 0.34432730353921653}
Correct predictions are:  4353
Total predictions are:  5000
Accuracy on test set is: 0.8706 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7318517528146373, 'tp': 2308, 'tn': 2010, 'fp': 483, 'fn': 199, 'auroc': 0.9452002103696493, 'auprc': 0.9416850920881426, 'eval_loss': 0.3614099787116051}
Correct predictions are:  4318
Total predictions are:  5000
Accuracy on test set is: 0.8636 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7429391815737424, 'tp': 2287, 'tn': 2064, 'fp': 429, 'fn': 220, 'auroc': 0.9472976668137079, 'auprc': 0.9453954596841815, 'eval_loss': 0.436902240511775}
Correct predictions are:  4351
Total predictions are:  5000
Accuracy on test set is: 0.8702 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7572551029232384, 'tp': 2220, 'tn': 2173, 'fp': 320, 'fn': 287, 'auroc': 0.9482770344919504, 'auprc': 0.9457536631680514, 'eval_loss': 0.4709034607455134}
Correct predictions are:  4393
Total predictions are:  5000
Accuracy on test set is: 0.8786 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7540704749850188, 'tp': 2218, 'tn': 2167, 'fp': 326, 'fn': 289, 'auroc': 0.9497773662545514, 'auprc': 0.9478760024636029, 'eval_loss': 0.5078845304101706}
Correct predictions are:  4385
Total predictions are:  5000
Accuracy on test set is: 0.877 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7333918256133108, 'tp': 2146, 'tn': 2187, 'fp': 306, 'fn': 361, 'auroc': 0.9433138755807846, 'auprc': 0.942317636063001, 'eval_loss': 0.5672610372170805}
Correct predictions are:  4333
Total predictions are:  5000
Accuracy on test set is: 0.8666 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.748961890139815, 'tp': 2168, 'tn': 2204, 'fp': 289, 'fn': 339, 'auroc': 0.9487995185882256, 'auprc': 0.9488363429448456, 'eval_loss': 0.6046757041949778}
Correct predictions are:  4372
Total predictions are:  5000
Accuracy on test set is: 0.8744 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7528935049090826, 'tp': 2244, 'tn': 2137, 'fp': 356, 'fn': 263, 'auroc': 0.9441882824361343, 'auprc': 0.9462918068460894, 'eval_loss': 0.5851880964189767}
Correct predictions are:  4381
Total predictions are:  5000
Accuracy on test set is: 0.8762 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7401601457755735, 'tp': 2134, 'tn': 2215, 'fp': 278, 'fn': 373, 'auroc': 0.9456453338594175, 'auprc': 0.94578195308337, 'eval_loss': 0.6576455173149705}
Correct predictions are:  4349
Total predictions are:  5000
Accuracy on test set is: 0.8698 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7532362990689812, 'tp': 2187, 'tn': 2196, 'fp': 297, 'fn': 320, 'auroc': 0.9465523009700396, 'auprc': 0.9449518750498248, 'eval_loss': 0.6163822281371802}
Correct predictions are:  4383
Total predictions are:  5000
Accuracy on test set is: 0.8766 


[0.8706, 0.8636, 0.8702, 0.8786, 0.877, 0.8666, 0.8744, 0.8762, 0.8698, 0.8766]

RUN NUMBER:  2


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.7801113960733452, 'auprc': 0.7791225884902171, 'eval_loss': 0.6379854397773743}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5996380533446285, 'tp': 1994, 'tn': 2005, 'fp': 488, 'fn': 513, 'auroc': 0.8650151017183976, 'auprc': 0.8271733368315759, 'eval_loss': 0.47904301706552505}
Correct predictions are:  3999
Total predictions are:  5000
Accuracy on test set is: 0.7998 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6485298879000932, 'tp': 2148, 'tn': 1970, 'fp': 523, 'fn': 359, 'auroc': 0.9069155902182273, 'auprc': 0.9068928268152734, 'eval_loss': 0.4564555660903454}
Correct predictions are:  4118
Total predictions are:  5000
Accuracy on test set is: 0.8236 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6451861306144698, 'tp': 2161, 'tn': 1947, 'fp': 546, 'fn': 346, 'auroc': 0.9113990653686723, 'auprc': 0.9076273309793343, 'eval_loss': 0.4745463578104973}
Correct predictions are:  4108
Total predictions are:  5000
Accuracy on test set is: 0.8216 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6608145713239894, 'tp': 2094, 'tn': 2058, 'fp': 435, 'fn': 413, 'auroc': 0.8882576839402421, 'auprc': 0.8332382827087401, 'eval_loss': 0.4623284759759903}
Correct predictions are:  4152
Total predictions are:  5000
Accuracy on test set is: 0.8304 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6339543355830707, 'tp': 2313, 'tn': 1731, 'fp': 762, 'fn': 194, 'auroc': 0.9078007171576226, 'auprc': 0.891703762119164, 'eval_loss': 0.4788289360165596}
Correct predictions are:  4044
Total predictions are:  5000
Accuracy on test set is: 0.8088 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6761385889393369, 'tp': 2012, 'tn': 2174, 'fp': 319, 'fn': 495, 'auroc': 0.8761371089149339, 'auprc': 0.894488688597155, 'eval_loss': 0.46866661533117293}
Correct predictions are:  4186
Total predictions are:  5000
Accuracy on test set is: 0.8372 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6777142498410862, 'tp': 2218, 'tn': 1969, 'fp': 524, 'fn': 289, 'auroc': 0.9206392178114676, 'auprc': 0.9179351868558592, 'eval_loss': 0.46631428941190245}
Correct predictions are:  4187
Total predictions are:  5000
Accuracy on test set is: 0.8374 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.4971616129219992, 'tp': 1447, 'tn': 2230, 'fp': 263, 'fn': 1060, 'auroc': 0.874515016197727, 'auprc': 0.8619070884418549, 'eval_loss': 0.5509096937179565}
Correct predictions are:  3677
Total predictions are:  5000
Accuracy on test set is: 0.7354 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6741959862975011, 'tp': 2048, 'tn': 2136, 'fp': 357, 'fn': 459, 'auroc': 0.8870062341288755, 'auprc': 0.8103514442102835, 'eval_loss': 0.4209657679796219}
Correct predictions are:  4184
Total predictions are:  5000
Accuracy on test set is: 0.8368 


[0.4986, 0.7998, 0.8236, 0.8216, 0.8304, 0.8088, 0.8372, 0.8374, 0.7354, 0.8368]

RUN NUMBER:  3


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7433948889934993, 'tp': 2216, 'tn': 2142, 'fp': 351, 'fn': 291, 'auroc': 0.944963088510614, 'auprc': 0.943750177409058, 'eval_loss': 0.3075600063845515}
Correct predictions are:  4358
Total predictions are:  5000
Accuracy on test set is: 0.8716 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.737017429424486, 'tp': 2230, 'tn': 2111, 'fp': 382, 'fn': 277, 'auroc': 0.9200826534480031, 'auprc': 0.9324399637576394, 'eval_loss': 0.40551237858235833}
Correct predictions are:  4341
Total predictions are:  5000
Accuracy on test set is: 0.8682 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7333323567608373, 'tp': 2222, 'tn': 2110, 'fp': 383, 'fn': 285, 'auroc': 0.9422767474497001, 'auprc': 0.9424178453142591, 'eval_loss': 0.3860424086481333}
Correct predictions are:  4332
Total predictions are:  5000
Accuracy on test set is: 0.8664 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7529588048736185, 'tp': 2225, 'tn': 2157, 'fp': 336, 'fn': 282, 'auroc': 0.9462496585973235, 'auprc': 0.9437356788003904, 'eval_loss': 0.4448329320907593}
Correct predictions are:  4382
Total predictions are:  5000
Accuracy on test set is: 0.8764 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7415100455834417, 'tp': 2221, 'tn': 2132, 'fp': 361, 'fn': 286, 'auroc': 0.9434539566790202, 'auprc': 0.9395813469554535, 'eval_loss': 0.511377659226954}
Correct predictions are:  4353
Total predictions are:  5000
Accuracy on test set is: 0.8706 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7449401350953445, 'tp': 2141, 'tn': 2220, 'fp': 273, 'fn': 366, 'auroc': 0.946724222317903, 'auprc': 0.9454878704557469, 'eval_loss': 0.518312225459516}
Correct predictions are:  4361
Total predictions are:  5000
Accuracy on test set is: 0.8722 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7404601241445632, 'tp': 2098, 'tn': 2249, 'fp': 244, 'fn': 409, 'auroc': 0.9473867875124141, 'auprc': 0.9452784872336815, 'eval_loss': 0.5675390464760363}
Correct predictions are:  4347
Total predictions are:  5000
Accuracy on test set is: 0.8694 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7072431297265532, 'tp': 2298, 'tn': 1955, 'fp': 538, 'fn': 209, 'auroc': 0.9385398381523311, 'auprc': 0.9364444080158179, 'eval_loss': 0.6142093014970422}
Correct predictions are:  4253
Total predictions are:  5000
Accuracy on test set is: 0.8506 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7161123095697768, 'tp': 2288, 'tn': 1991, 'fp': 502, 'fn': 219, 'auroc': 0.9329593144010249, 'auprc': 0.908274513923488, 'eval_loss': 0.5069685862123966}
Correct predictions are:  4279
Total predictions are:  5000
Accuracy on test set is: 0.8558 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7157692088233281, 'tp': 2217, 'tn': 2070, 'fp': 423, 'fn': 290, 'auroc': 0.9351335714472002, 'auprc': 0.9331136829245225, 'eval_loss': 0.5881631569057703}
Correct predictions are:  4287
Total predictions are:  5000
Accuracy on test set is: 0.8574 


[0.8716, 0.8682, 0.8664, 0.8764, 0.8706, 0.8722, 0.8694, 0.8506, 0.8558, 0.8574]

RUN NUMBER:  4


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7191744490883331, 'tp': 2341, 'tn': 1935, 'fp': 558, 'fn': 166, 'auroc': 0.9349132497198779, 'auprc': 0.931056188405965, 'eval_loss': 0.37073607220351695}
Correct predictions are:  4276
Total predictions are:  5000
Accuracy on test set is: 0.8552 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7493461588361252, 'tp': 2278, 'tn': 2091, 'fp': 402, 'fn': 229, 'auroc': 0.9461444577725489, 'auprc': 0.9434030192471853, 'eval_loss': 0.3615339206263423}
Correct predictions are:  4369
Total predictions are:  5000
Accuracy on test set is: 0.8738 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7301986368216339, 'tp': 2221, 'tn': 2103, 'fp': 390, 'fn': 286, 'auroc': 0.9388132002954904, 'auprc': 0.9292847487932858, 'eval_loss': 0.4804433898627758}
Correct predictions are:  4324
Total predictions are:  5000
Accuracy on test set is: 0.8648 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.726598047092696, 'tp': 2115, 'tn': 2200, 'fp': 293, 'fn': 392, 'auroc': 0.9298963303872302, 'auprc': 0.9372118376371723, 'eval_loss': 0.5031962810859084}
Correct predictions are:  4315
Total predictions are:  5000
Accuracy on test set is: 0.863 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7375006343819124, 'tp': 2258, 'tn': 2082, 'fp': 411, 'fn': 249, 'auroc': 0.9443344435820377, 'auprc': 0.9430039126224178, 'eval_loss': 0.47893249912708996}
Correct predictions are:  4340
Total predictions are:  5000
Accuracy on test set is: 0.868 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7392274521608299, 'tp': 2171, 'tn': 2177, 'fp': 316, 'fn': 336, 'auroc': 0.9433349957463666, 'auprc': 0.9439761252562799, 'eval_loss': 0.4845332849636674}
Correct predictions are:  4348
Total predictions are:  5000
Accuracy on test set is: 0.8696 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.747774862088599, 'tp': 2267, 'tn': 2099, 'fp': 394, 'fn': 240, 'auroc': 0.9456349337778809, 'auprc': 0.9451342625083342, 'eval_loss': 0.5199265984922647}
Correct predictions are:  4366
Total predictions are:  5000
Accuracy on test set is: 0.8732 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7370988584270672, 'tp': 2098, 'tn': 2241, 'fp': 252, 'fn': 409, 'auroc': 0.9415457817189288, 'auprc': 0.9404520333042944, 'eval_loss': 0.6100581091627478}
Correct predictions are:  4339
Total predictions are:  5000
Accuracy on test set is: 0.8678 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7393179137806745, 'tp': 2095, 'tn': 2249, 'fp': 244, 'fn': 412, 'auroc': 0.9408369761618931, 'auprc': 0.9393686391607817, 'eval_loss': 0.6530544541694224}
Correct predictions are:  4344
Total predictions are:  5000
Accuracy on test set is: 0.8688 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7261691557633972, 'tp': 2193, 'tn': 2122, 'fp': 371, 'fn': 314, 'auroc': 0.9392464036918049, 'auprc': 0.9400604908395153, 'eval_loss': 0.6849987968795002}
Correct predictions are:  4315
Total predictions are:  5000
Accuracy on test set is: 0.863 


[0.8552, 0.8738, 0.8648, 0.863, 0.868, 0.8696, 0.8732, 0.8678, 0.8688, 0.863]

RUN NUMBER:  5


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7333087738416996, 'tp': 2153, 'tn': 2180, 'fp': 313, 'fn': 354, 'auroc': 0.9420299455147728, 'auprc': 0.9383067990530745, 'eval_loss': 0.32826949977874753}
Correct predictions are:  4333
Total predictions are:  5000
Accuracy on test set is: 0.8666 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7448798002593268, 'tp': 2261, 'tn': 2098, 'fp': 395, 'fn': 246, 'auroc': 0.9436766784251589, 'auprc': 0.9403579461190192, 'eval_loss': 0.39308913676440715}
Correct predictions are:  4359
Total predictions are:  5000
Accuracy on test set is: 0.8718 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7521463544881133, 'tp': 2223, 'tn': 2157, 'fp': 336, 'fn': 284, 'auroc': 0.9387851200753414, 'auprc': 0.9263739936008878, 'eval_loss': 0.48901566276997327}
Correct predictions are:  4380
Total predictions are:  5000
Accuracy on test set is: 0.876 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.75039884236253, 'tp': 2194, 'tn': 2182, 'fp': 311, 'fn': 313, 'auroc': 0.9426610704627923, 'auprc': 0.9370632952024585, 'eval_loss': 0.4687496257700026}
Correct predictions are:  4376
Total predictions are:  5000
Accuracy on test set is: 0.8752 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7236125342291119, 'tp': 2120, 'tn': 2188, 'fp': 305, 'fn': 387, 'auroc': 0.9283659983894273, 'auprc': 0.919021463015004, 'eval_loss': 0.49329259857833385}
Correct predictions are:  4308
Total predictions are:  5000
Accuracy on test set is: 0.8616 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7287559137589238, 'tp': 2208, 'tn': 2113, 'fp': 380, 'fn': 299, 'auroc': 0.9380915146374748, 'auprc': 0.9346345196443365, 'eval_loss': 0.47470136857330797}
Correct predictions are:  4321
Total predictions are:  5000
Accuracy on test set is: 0.8642 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7384577842038818, 'tp': 2263, 'tn': 2079, 'fp': 414, 'fn': 244, 'auroc': 0.9382831161396306, 'auprc': 0.9190529423084152, 'eval_loss': 0.4649614788681269}
Correct predictions are:  4342
Total predictions are:  5000
Accuracy on test set is: 0.8684 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.749207306653235, 'tp': 2201, 'tn': 2172, 'fp': 321, 'fn': 306, 'auroc': 0.9451307698252353, 'auprc': 0.9416470933750486, 'eval_loss': 0.5025771401286125}
Correct predictions are:  4373
Total predictions are:  5000
Accuracy on test set is: 0.8746 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7252155159487863, 'tp': 2156, 'tn': 2157, 'fp': 336, 'fn': 351, 'auroc': 0.9346785278796585, 'auprc': 0.9177198226680534, 'eval_loss': 0.6217546732351183}
Correct predictions are:  4313
Total predictions are:  5000
Accuracy on test set is: 0.8626 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7311992279595573, 'tp': 2175, 'tn': 2153, 'fp': 340, 'fn': 332, 'auroc': 0.9386688791640125, 'auprc': 0.9361211302585224, 'eval_loss': 0.6316833577297628}
Correct predictions are:  4328
Total predictions are:  5000
Accuracy on test set is: 0.8656 


[0.8666, 0.8718, 0.876, 0.8752, 0.8616, 0.8642, 0.8684, 0.8746, 0.8626, 0.8656]


 Over all runs maximum accuracies are: [0.8374, 0.8738, 0.876, 0.8764, 0.8786]
The median is: 0.876
RoBERTa Accuracy Score on Test set ->  ['0.876 +/- 0.03859999999999997']


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.



* * * * EVALUATION USING RSW_STM AS PREPROCESSING FUNCTION * * * *

RUN NUMBER:  1


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6836507614615833, 'tp': 2198, 'tn': 2007, 'fp': 486, 'fn': 309, 'auroc': 0.9203288153779126, 'auprc': 0.9163991148355253, 'eval_loss': 0.3681879830121994}
Correct predictions are:  4205
Total predictions are:  5000
Accuracy on test set is: 0.841 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6868212845260689, 'tp': 2201, 'tn': 2012, 'fp': 481, 'fn': 306, 'auroc': 0.9252856542395292, 'auprc': 0.9222926771790858, 'eval_loss': 0.39131617953777315}
Correct predictions are:  4213
Total predictions are:  5000
Accuracy on test set is: 0.8426 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6910185707743357, 'tp': 2189, 'tn': 2036, 'fp': 457, 'fn': 318, 'auroc': 0.9247317298967623, 'auprc': 0.9221675205101392, 'eval_loss': 0.5752449180394411}
Correct predictions are:  4225
Total predictions are:  5000
Accuracy on test set is: 0.845 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6940023047848534, 'tp': 2209, 'tn': 2022, 'fp': 471, 'fn': 298, 'auroc': 0.924278286341765, 'auprc': 0.9218407578863064, 'eval_loss': 0.5550112091556192}
Correct predictions are:  4231
Total predictions are:  5000
Accuracy on test set is: 0.8462 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.684622890561553, 'tp': 2082, 'tn': 2129, 'fp': 364, 'fn': 425, 'auroc': 0.9255178160596779, 'auprc': 0.9222999364301023, 'eval_loss': 0.6698801520988346}
Correct predictions are:  4211
Total predictions are:  5000
Accuracy on test set is: 0.8422 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6876527036212043, 'tp': 2134, 'tn': 2085, 'fp': 408, 'fn': 373, 'auroc': 0.9251931735144803, 'auprc': 0.9186262769076522, 'eval_loss': 0.721658469286561}
Correct predictions are:  4219
Total predictions are:  5000
Accuracy on test set is: 0.8438 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6781132883645025, 'tp': 2083, 'tn': 2112, 'fp': 381, 'fn': 424, 'auroc': 0.9226299534188347, 'auprc': 0.9187624944938996, 'eval_loss': 0.7849342568300665}
Correct predictions are:  4195
Total predictions are:  5000
Accuracy on test set is: 0.839 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6844611036635166, 'tp': 2097, 'tn': 2114, 'fp': 379, 'fn': 410, 'auroc': 0.9230769969236559, 'auprc': 0.9087836302740272, 'eval_loss': 0.8360076157325879}
Correct predictions are:  4211
Total predictions are:  5000
Accuracy on test set is: 0.8422 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6789236548696904, 'tp': 2083, 'tn': 2114, 'fp': 379, 'fn': 424, 'auroc': 0.9236098011008407, 'auprc': 0.9213524698299678, 'eval_loss': 0.8924444716483354}
Correct predictions are:  4197
Total predictions are:  5000
Accuracy on test set is: 0.8394 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6904258987636879, 'tp': 2133, 'tn': 2093, 'fp': 400, 'fn': 374, 'auroc': 0.9174750330042588, 'auprc': 0.901663338309179, 'eval_loss': 0.8785233214184642}
Correct predictions are:  4226
Total predictions are:  5000
Accuracy on test set is: 0.8452 


[0.841, 0.8426, 0.845, 0.8462, 0.8422, 0.8438, 0.839, 0.8422, 0.8394, 0.8452]

RUN NUMBER:  2


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5220732370101208, 'tp': 1891, 'tn': 1914, 'fp': 579, 'fn': 616, 'auroc': 0.8338873376767273, 'auprc': 0.8331861951167159, 'eval_loss': 0.5228897332191468}
Correct predictions are:  3805
Total predictions are:  5000
Accuracy on test set is: 0.761 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5177590352118983, 'tp': 2103, 'tn': 1674, 'fp': 819, 'fn': 404, 'auroc': 0.8562007926142141, 'auprc': 0.8485274652076443, 'eval_loss': 0.5068899064421654}
Correct predictions are:  3777
Total predictions are:  5000
Accuracy on test set is: 0.7554 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5731288303098123, 'tp': 1871, 'tn': 2057, 'fp': 436, 'fn': 636, 'auroc': 0.8713425913259161, 'auprc': 0.8648890301796601, 'eval_loss': 0.5033555386185646}
Correct predictions are:  3928
Total predictions are:  5000
Accuracy on test set is: 0.7856 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5999979519889818, 'tp': 2006, 'tn': 1994, 'fp': 499, 'fn': 501, 'auroc': 0.8444598205649931, 'auprc': 0.8590765790080238, 'eval_loss': 0.48512561653852465}
Correct predictions are:  4000
Total predictions are:  5000
Accuracy on test set is: 0.8 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5718692828174857, 'tp': 2234, 'tn': 1660, 'fp': 833, 'fn': 273, 'auroc': 0.8762031894330051, 'auprc': 0.8610989702167694, 'eval_loss': 0.5067596938610077}
Correct predictions are:  3894
Total predictions are:  5000
Accuracy on test set is: 0.7788 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6174200574634311, 'tp': 1997, 'tn': 2046, 'fp': 447, 'fn': 510, 'auroc': 0.8668356759916998, 'auprc': 0.8617772686060576, 'eval_loss': 0.47865013432502745}
Correct predictions are:  4043
Total predictions are:  5000
Accuracy on test set is: 0.8086 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5572220435244448, 'tp': 1944, 'tn': 1949, 'fp': 544, 'fn': 563, 'auroc': 0.869891859952182, 'auprc': 0.8464499761833549, 'eval_loss': 0.5110029646873474}
Correct predictions are:  3893
Total predictions are:  5000
Accuracy on test set is: 0.7786 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5950725448530595, 'tp': 2041, 'tn': 1946, 'fp': 547, 'fn': 466, 'auroc': 0.865339344260459, 'auprc': 0.848386945124849, 'eval_loss': 0.5198481604337692}
Correct predictions are:  3987
Total predictions are:  5000
Accuracy on test set is: 0.7974 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6001118762697275, 'tp': 1957, 'tn': 2042, 'fp': 451, 'fn': 550, 'auroc': 0.8499650637260996, 'auprc': 0.8545347256189122, 'eval_loss': 0.5160871288537979}
Correct predictions are:  3999
Total predictions are:  5000
Accuracy on test set is: 0.7998 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5446833395551982, 'tp': 1581, 'tn': 2231, 'fp': 262, 'fn': 926, 'auroc': 0.7381829073539936, 'auprc': 0.8062713414070884, 'eval_loss': 0.5438293119430542}
Correct predictions are:  3812
Total predictions are:  5000
Accuracy on test set is: 0.7624 


[0.761, 0.7554, 0.7856, 0.8, 0.7788, 0.8086, 0.7786, 0.7974, 0.7998, 0.7624]

RUN NUMBER:  3


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6432487356667519, 'tp': 2047, 'tn': 2061, 'fp': 432, 'fn': 460, 'auroc': 0.9055218192910632, 'auprc': 0.9000621818393372, 'eval_loss': 0.39396306728720665}
Correct predictions are:  4108
Total predictions are:  5000
Accuracy on test set is: 0.8216 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6400625128502732, 'tp': 2041, 'tn': 2059, 'fp': 434, 'fn': 466, 'auroc': 0.8984158435802136, 'auprc': 0.8961495746083038, 'eval_loss': 0.5053328499853611}
Correct predictions are:  4100
Total predictions are:  5000
Accuracy on test set is: 0.82 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6386032920314668, 'tp': 1961, 'tn': 2131, 'fp': 362, 'fn': 546, 'auroc': 0.8992360100103186, 'auprc': 0.8978533513560956, 'eval_loss': 0.4441374711930752}
Correct predictions are:  4092
Total predictions are:  5000
Accuracy on test set is: 0.8184 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6141802878831222, 'tp': 1876, 'tn': 2149, 'fp': 344, 'fn': 631, 'auroc': 0.8923097957087984, 'auprc': 0.8920458314476448, 'eval_loss': 0.47051418089866637}
Correct predictions are:  4025
Total predictions are:  5000
Accuracy on test set is: 0.805 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6104840487361218, 'tp': 2001, 'tn': 2025, 'fp': 468, 'fn': 506, 'auroc': 0.8908041839048018, 'auprc': 0.885911107607435, 'eval_loss': 0.5083059049665928}
Correct predictions are:  4026
Total predictions are:  5000
Accuracy on test set is: 0.8052 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6336148926550307, 'tp': 2127, 'tn': 1954, 'fp': 539, 'fn': 380, 'auroc': 0.8966663898644966, 'auprc': 0.8936712530363876, 'eval_loss': 0.5584840061366558}
Correct predictions are:  4081
Total predictions are:  5000
Accuracy on test set is: 0.8162 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6284046669379091, 'tp': 1923, 'tn': 2141, 'fp': 352, 'fn': 584, 'auroc': 0.8994046513324664, 'auprc': 0.9003985705645201, 'eval_loss': 0.5319991841316223}
Correct predictions are:  4064
Total predictions are:  5000
Accuracy on test set is: 0.8128 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6352696440980501, 'tp': 2072, 'tn': 2016, 'fp': 477, 'fn': 435, 'auroc': 0.9007704220401087, 'auprc': 0.89558327637086, 'eval_loss': 0.5033798124969006}
Correct predictions are:  4088
Total predictions are:  5000
Accuracy on test set is: 0.8176 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6312767238244597, 'tp': 2068, 'tn': 2010, 'fp': 483, 'fn': 439, 'auroc': 0.8972136741552054, 'auprc': 0.8920522783750857, 'eval_loss': 0.5508456537455321}
Correct predictions are:  4078
Total predictions are:  5000
Accuracy on test set is: 0.8156 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6348079933065264, 'tp': 2007, 'tn': 2079, 'fp': 414, 'fn': 500, 'auroc': 0.9024993155946344, 'auprc': 0.9004464395612481, 'eval_loss': 0.5196078811794519}
Correct predictions are:  4086
Total predictions are:  5000
Accuracy on test set is: 0.8172 


[0.8216, 0.82, 0.8184, 0.805, 0.8052, 0.8162, 0.8128, 0.8176, 0.8156, 0.8172]

RUN NUMBER:  4


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6728890339397783, 'tp': 2147, 'tn': 2034, 'fp': 459, 'fn': 360, 'auroc': 0.9209406601747757, 'auprc': 0.9178791560070557, 'eval_loss': 0.38165126914978026}
Correct predictions are:  4181
Total predictions are:  5000
Accuracy on test set is: 0.8362 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6935078927304561, 'tp': 2206, 'tn': 2024, 'fp': 469, 'fn': 301, 'auroc': 0.9265019837755527, 'auprc': 0.9221385950109158, 'eval_loss': 0.43537402956336735}
Correct predictions are:  4230
Total predictions are:  5000
Accuracy on test set is: 0.846 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.691329984695202, 'tp': 2209, 'tn': 2015, 'fp': 478, 'fn': 298, 'auroc': 0.9267519057349409, 'auprc': 0.9244400141288964, 'eval_loss': 0.4994985966578126}
Correct predictions are:  4224
Total predictions are:  5000
Accuracy on test set is: 0.8448 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6836905268500378, 'tp': 2176, 'tn': 2031, 'fp': 462, 'fn': 331, 'auroc': 0.9218325871674834, 'auprc': 0.9160781647717333, 'eval_loss': 0.5784385565437377}
Correct predictions are:  4207
Total predictions are:  5000
Accuracy on test set is: 0.8414 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.69081943457341, 'tp': 2112, 'tn': 2115, 'fp': 378, 'fn': 395, 'auroc': 0.9248745310163233, 'auprc': 0.921796500790001, 'eval_loss': 0.6163155627295375}
Correct predictions are:  4227
Total predictions are:  5000
Accuracy on test set is: 0.8454 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6872636393715835, 'tp': 2135, 'tn': 2083, 'fp': 410, 'fn': 372, 'auroc': 0.9215053846022153, 'auprc': 0.9177291847221059, 'eval_loss': 0.7138408770941198}
Correct predictions are:  4218
Total predictions are:  5000
Accuracy on test set is: 0.8436 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6841733854816066, 'tp': 2142, 'tn': 2068, 'fp': 425, 'fn': 365, 'auroc': 0.9201425739177794, 'auprc': 0.9116727941696493, 'eval_loss': 0.6800490226730704}
Correct predictions are:  4210
Total predictions are:  5000
Accuracy on test set is: 0.842 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6857090905054352, 'tp': 2093, 'tn': 2121, 'fp': 372, 'fn': 414, 'auroc': 0.9213048230298125, 'auprc': 0.9156018845327211, 'eval_loss': 0.7433596841655672}
Correct predictions are:  4214
Total predictions are:  5000
Accuracy on test set is: 0.8428 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6964006207630575, 'tp': 2133, 'tn': 2108, 'fp': 385, 'fn': 374, 'auroc': 0.9220728290509799, 'auprc': 0.9142975815444633, 'eval_loss': 0.7416061689633876}
Correct predictions are:  4241
Total predictions are:  5000
Accuracy on test set is: 0.8482 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6908292203004889, 'tp': 2110, 'tn': 2117, 'fp': 376, 'fn': 397, 'auroc': 0.9236386013266343, 'auprc': 0.9207317651472319, 'eval_loss': 0.831312226115726}
Correct predictions are:  4227
Total predictions are:  5000
Accuracy on test set is: 0.8454 


[0.8362, 0.846, 0.8448, 0.8414, 0.8454, 0.8436, 0.842, 0.8428, 0.8482, 0.8454]

RUN NUMBER:  5


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.578804451386366, 'tp': 1990, 'tn': 1957, 'fp': 536, 'fn': 517, 'auroc': 0.8623435607735164, 'auprc': 0.8566062362541933, 'eval_loss': 0.4750466478228569}
Correct predictions are:  3947
Total predictions are:  5000
Accuracy on test set is: 0.7894 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6278343933983217, 'tp': 2078, 'tn': 1991, 'fp': 502, 'fn': 429, 'auroc': 0.893015481241373, 'auprc': 0.88302351094434, 'eval_loss': 0.4433751465559006}
Correct predictions are:  4069
Total predictions are:  5000
Accuracy on test set is: 0.8138 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6513985558515234, 'tp': 2104, 'tn': 2024, 'fp': 469, 'fn': 403, 'auroc': 0.9024355550947518, 'auprc': 0.8953802131147736, 'eval_loss': 0.42861209372878073}
Correct predictions are:  4128
Total predictions are:  5000
Accuracy on test set is: 0.8256 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6368862780300754, 'tp': 2133, 'tn': 1956, 'fp': 537, 'fn': 374, 'auroc': 0.8916592306083679, 'auprc': 0.8954269455214934, 'eval_loss': 0.47444724855422976}
Correct predictions are:  4089
Total predictions are:  5000
Accuracy on test set is: 0.8178 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6112636321154148, 'tp': 1800, 'tn': 2206, 'fp': 287, 'fn': 707, 'auroc': 0.8879628816289921, 'auprc': 0.8686330562101079, 'eval_loss': 0.4992600801587105}
Correct predictions are:  4006
Total predictions are:  5000
Accuracy on test set is: 0.8012 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6109208533545325, 'tp': 2048, 'tn': 1979, 'fp': 514, 'fn': 459, 'auroc': 0.8845371747714501, 'auprc': 0.877622544858612, 'eval_loss': 0.5077391351103783}
Correct predictions are:  4027
Total predictions are:  5000
Accuracy on test set is: 0.8054 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6228302471093858, 'tp': 2145, 'tn': 1906, 'fp': 587, 'fn': 362, 'auroc': 0.8976247173777843, 'auprc': 0.8963983957990788, 'eval_loss': 0.5086533996641636}
Correct predictions are:  4051
Total predictions are:  5000
Accuracy on test set is: 0.8102 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6356390583983301, 'tp': 2068, 'tn': 2021, 'fp': 472, 'fn': 439, 'auroc': 0.8776415207095224, 'auprc': 0.828589495529977, 'eval_loss': 0.5138966761052608}
Correct predictions are:  4089
Total predictions are:  5000
Accuracy on test set is: 0.8178 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6352745922132295, 'tp': 2101, 'tn': 1986, 'fp': 507, 'fn': 406, 'auroc': 0.9004060991838176, 'auprc': 0.8997939678094672, 'eval_loss': 0.5055201793909073}
Correct predictions are:  4087
Total predictions are:  5000
Accuracy on test set is: 0.8174 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6290680246293446, 'tp': 2179, 'tn': 1884, 'fp': 609, 'fn': 328, 'auroc': 0.8944094121697914, 'auprc': 0.8731133683935042, 'eval_loss': 0.5059668414831161}
Correct predictions are:  4063
Total predictions are:  5000
Accuracy on test set is: 0.8126 


[0.7894, 0.8138, 0.8256, 0.8178, 0.8012, 0.8054, 0.8102, 0.8178, 0.8174, 0.8126]


 Over all runs maximum accuracies are: [0.8086, 0.8216, 0.8256, 0.8462, 0.8482]
The median is: 0.8256
RoBERTa Accuracy Score on Test set ->  ['0.8256 +/- 0.022599999999999953']


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.



* * * * EVALUATION USING STM_LOW AS PREPROCESSING FUNCTION * * * *

RUN NUMBER:  1


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6920432454306552, 'tp': 2138, 'tn': 2092, 'fp': 401, 'fn': 369, 'auroc': 0.9259614995381563, 'auprc': 0.9257158808533541, 'eval_loss': 0.37885775334835053}
Correct predictions are:  4230
Total predictions are:  5000
Accuracy on test set is: 0.846 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6904633902178224, 'tp': 2167, 'tn': 2058, 'fp': 435, 'fn': 340, 'auroc': 0.9264911036902529, 'auprc': 0.9247494421312834, 'eval_loss': 0.43150789572298526}
Correct predictions are:  4225
Total predictions are:  5000
Accuracy on test set is: 0.845 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6914768992773017, 'tp': 2087, 'tn': 2141, 'fp': 352, 'fn': 420, 'auroc': 0.9268342263803349, 'auprc': 0.9244944866123139, 'eval_loss': 0.5332541909351944}
Correct predictions are:  4228
Total predictions are:  5000
Accuracy on test set is: 0.8456 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6717940963369282, 'tp': 2205, 'tn': 1968, 'fp': 525, 'fn': 302, 'auroc': 0.9224785922321631, 'auprc': 0.9209367863928395, 'eval_loss': 0.5787797049030662}
Correct predictions are:  4173
Total predictions are:  5000
Accuracy on test set is: 0.8346 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6765751082612355, 'tp': 2052, 'tn': 2138, 'fp': 355, 'fn': 455, 'auroc': 0.9246408491842576, 'auprc': 0.9225985695300178, 'eval_loss': 0.6469743655256927}
Correct predictions are:  4190
Total predictions are:  5000
Accuracy on test set is: 0.838 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6782169558429714, 'tp': 2138, 'tn': 2057, 'fp': 436, 'fn': 369, 'auroc': 0.9212079422702675, 'auprc': 0.9188016480168493, 'eval_loss': 0.6751312509380281}
Correct predictions are:  4195
Total predictions are:  5000
Accuracy on test set is: 0.839 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6880376446647343, 'tp': 2105, 'tn': 2115, 'fp': 378, 'fn': 402, 'auroc': 0.9243672470392168, 'auprc': 0.9236952257613796, 'eval_loss': 0.744260276376456}
Correct predictions are:  4220
Total predictions are:  5000
Accuracy on test set is: 0.844 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6777877383973396, 'tp': 2076, 'tn': 2118, 'fp': 375, 'fn': 431, 'auroc': 0.9191870464264439, 'auprc': 0.9177940568038401, 'eval_loss': 0.7849791556425393}
Correct predictions are:  4194
Total predictions are:  5000
Accuracy on test set is: 0.8388 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6868892929994832, 'tp': 2242, 'tn': 1966, 'fp': 527, 'fn': 265, 'auroc': 0.9236589214859445, 'auprc': 0.9214771579788016, 'eval_loss': 0.827750141350925}
Correct predictions are:  4208
Total predictions are:  5000
Accuracy on test set is: 0.8416 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6821917447470435, 'tp': 2141, 'tn': 2064, 'fp': 429, 'fn': 366, 'auroc': 0.9195293691102538, 'auprc': 0.9184209773804192, 'eval_loss': 0.7507146577265114}
Correct predictions are:  4205
Total predictions are:  5000
Accuracy on test set is: 0.841 


[0.846, 0.845, 0.8456, 0.8346, 0.838, 0.839, 0.844, 0.8388, 0.8416, 0.841]

RUN NUMBER:  2


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7018962120434753, 'tp': 2099, 'tn': 2155, 'fp': 338, 'fn': 408, 'auroc': 0.9254411754588154, 'auprc': 0.9200925224474024, 'eval_loss': 0.37767331234812734}
Correct predictions are:  4254
Total predictions are:  5000
Accuracy on test set is: 0.8508 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7038003016347212, 'tp': 2168, 'tn': 2091, 'fp': 402, 'fn': 339, 'auroc': 0.9306233760872686, 'auprc': 0.9213496931577883, 'eval_loss': 0.40587987167686224}
Correct predictions are:  4259
Total predictions are:  5000
Accuracy on test set is: 0.8518 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6982710674739876, 'tp': 2096, 'tn': 2149, 'fp': 344, 'fn': 411, 'auroc': 0.9320270670922061, 'auprc': 0.9303483876287622, 'eval_loss': 0.6134875922016799}
Correct predictions are:  4245
Total predictions are:  5000
Accuracy on test set is: 0.849 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6976423119109082, 'tp': 2068, 'tn': 2174, 'fp': 319, 'fn': 439, 'auroc': 0.9307919374087893, 'auprc': 0.9300261783646245, 'eval_loss': 0.5768882615342736}
Correct predictions are:  4242
Total predictions are:  5000
Accuracy on test set is: 0.8484 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.683714246611721, 'tp': 2064, 'tn': 2144, 'fp': 349, 'fn': 443, 'auroc': 0.9244470876651673, 'auprc': 0.9226014759339781, 'eval_loss': 0.7041057562798262}
Correct predictions are:  4208
Total predictions are:  5000
Accuracy on test set is: 0.8416 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6875143227187839, 'tp': 2060, 'tn': 2157, 'fp': 336, 'fn': 447, 'auroc': 0.9263407025111076, 'auprc': 0.9257995461533914, 'eval_loss': 0.745538721036911}
Correct predictions are:  4217
Total predictions are:  5000
Accuracy on test set is: 0.8434 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6808016032114322, 'tp': 2105, 'tn': 2097, 'fp': 396, 'fn': 402, 'auroc': 0.9236937217587786, 'auprc': 0.9185190166528309, 'eval_loss': 0.7974254696376621}
Correct predictions are:  4202
Total predictions are:  5000
Accuracy on test set is: 0.8404 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6790809642817179, 'tp': 2071, 'tn': 2126, 'fp': 367, 'fn': 436, 'auroc': 0.9228261149567414, 'auprc': 0.9187277664237183, 'eval_loss': 0.8141050030376762}
Correct predictions are:  4197
Total predictions are:  5000
Accuracy on test set is: 0.8394 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6864255345566835, 'tp': 2128, 'tn': 2088, 'fp': 405, 'fn': 379, 'auroc': 0.9243592469764963, 'auprc': 0.9202378459247419, 'eval_loss': 0.8466130537070334}
Correct predictions are:  4216
Total predictions are:  5000
Accuracy on test set is: 0.8432 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6912132591119514, 'tp': 2114, 'tn': 2114, 'fp': 379, 'fn': 393, 'auroc': 0.9254742157178513, 'auprc': 0.9246087186436351, 'eval_loss': 0.8549376985309646}
Correct predictions are:  4228
Total predictions are:  5000
Accuracy on test set is: 0.8456 


[0.8508, 0.8518, 0.849, 0.8484, 0.8416, 0.8434, 0.8404, 0.8394, 0.8432, 0.8456]

RUN NUMBER:  3


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6684423554934075, 'tp': 2169, 'tn': 1999, 'fp': 494, 'fn': 338, 'auroc': 0.9168597481804258, 'auprc': 0.9133604417030792, 'eval_loss': 0.3862387719988823}
Correct predictions are:  4168
Total predictions are:  5000
Accuracy on test set is: 0.8336 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6931306092525988, 'tp': 2195, 'tn': 2035, 'fp': 458, 'fn': 312, 'auroc': 0.9270264678875083, 'auprc': 0.9269178769694675, 'eval_loss': 0.46953904560506343}
Correct predictions are:  4230
Total predictions are:  5000
Accuracy on test set is: 0.846 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6866358124520922, 'tp': 2170, 'tn': 2045, 'fp': 448, 'fn': 337, 'auroc': 0.9215709851165234, 'auprc': 0.9187978240943876, 'eval_loss': 0.4943471695765853}
Correct predictions are:  4215
Total predictions are:  5000
Accuracy on test set is: 0.843 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6846076038551091, 'tp': 2181, 'tn': 2028, 'fp': 465, 'fn': 326, 'auroc': 0.924616768995469, 'auprc': 0.9240961245037349, 'eval_loss': 0.5620090110257268}
Correct predictions are:  4209
Total predictions are:  5000
Accuracy on test set is: 0.8418 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.671381115172722, 'tp': 2127, 'tn': 2051, 'fp': 442, 'fn': 380, 'auroc': 0.9213568234374959, 'auprc': 0.9210411965515245, 'eval_loss': 0.5997200520932674}
Correct predictions are:  4178
Total predictions are:  5000
Accuracy on test set is: 0.8356 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6821449002360739, 'tp': 2176, 'tn': 2027, 'fp': 466, 'fn': 331, 'auroc': 0.9229233957194225, 'auprc': 0.9222123584837397, 'eval_loss': 0.7000398316562175}
Correct predictions are:  4203
Total predictions are:  5000
Accuracy on test set is: 0.8406 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6809483575347328, 'tp': 2136, 'tn': 2066, 'fp': 427, 'fn': 371, 'auroc': 0.9215089046298124, 'auprc': 0.9184426758955864, 'eval_loss': 0.6428504972882569}
Correct predictions are:  4202
Total predictions are:  5000
Accuracy on test set is: 0.8404 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6812517751702881, 'tp': 2126, 'tn': 2077, 'fp': 416, 'fn': 381, 'auroc': 0.9239301236121691, 'auprc': 0.9235646542896597, 'eval_loss': 0.7091516626268626}
Correct predictions are:  4203
Total predictions are:  5000
Accuracy on test set is: 0.8406 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6840038560204671, 'tp': 2108, 'tn': 2102, 'fp': 391, 'fn': 399, 'auroc': 0.922469152158153, 'auprc': 0.9199071024680591, 'eval_loss': 0.7427723490618169}
Correct predictions are:  4210
Total predictions are:  5000
Accuracy on test set is: 0.842 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6904314783544713, 'tp': 2182, 'tn': 2042, 'fp': 451, 'fn': 325, 'auroc': 0.9224661121343191, 'auprc': 0.9215711436410587, 'eval_loss': 0.7255972735192626}
Correct predictions are:  4224
Total predictions are:  5000
Accuracy on test set is: 0.8448 


[0.8336, 0.846, 0.843, 0.8418, 0.8356, 0.8406, 0.8404, 0.8406, 0.842, 0.8448]

RUN NUMBER:  4


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6857048443182936, 'tp': 2067, 'tn': 2146, 'fp': 347, 'fn': 440, 'auroc': 0.9215784251748533, 'auprc': 0.9197121799180087, 'eval_loss': 0.38541948680877686}
Correct predictions are:  4213
Total predictions are:  5000
Accuracy on test set is: 0.8426 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6983971782132095, 'tp': 2132, 'tn': 2114, 'fp': 379, 'fn': 375, 'auroc': 0.9268253463107152, 'auprc': 0.9227665221523942, 'eval_loss': 0.434113380484283}
Correct predictions are:  4246
Total predictions are:  5000
Accuracy on test set is: 0.8492 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6884407032371922, 'tp': 2105, 'tn': 2116, 'fp': 377, 'fn': 402, 'auroc': 0.924150285338237, 'auprc': 0.9201812753679314, 'eval_loss': 0.5167371700763702}
Correct predictions are:  4221
Total predictions are:  5000
Accuracy on test set is: 0.8442 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6871938446046302, 'tp': 2234, 'tn': 1976, 'fp': 517, 'fn': 273, 'auroc': 0.9203852958207193, 'auprc': 0.9160045638784204, 'eval_loss': 0.5674971140906214}
Correct predictions are:  4210
Total predictions are:  5000
Accuracy on test set is: 0.842 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6628500492015885, 'tp': 2133, 'tn': 2023, 'fp': 470, 'fn': 374, 'auroc': 0.900424979331838, 'auprc': 0.9071833105357248, 'eval_loss': 0.6579595647335053}
Correct predictions are:  4156
Total predictions are:  5000
Accuracy on test set is: 0.8312 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6748056036303232, 'tp': 2040, 'tn': 2145, 'fp': 348, 'fn': 467, 'auroc': 0.9153239761399729, 'auprc': 0.9039898248797501, 'eval_loss': 0.7020508555836975}
Correct predictions are:  4185
Total predictions are:  5000
Accuracy on test set is: 0.837 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6792158815342149, 'tp': 2117, 'tn': 2081, 'fp': 412, 'fn': 390, 'auroc': 0.9164867852563964, 'auprc': 0.9130695244681049, 'eval_loss': 0.7352628377594054}
Correct predictions are:  4198
Total predictions are:  5000
Accuracy on test set is: 0.8396 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6687702472466299, 'tp': 2136, 'tn': 2035, 'fp': 458, 'fn': 371, 'auroc': 0.9135764424393086, 'auprc': 0.9109688206036226, 'eval_loss': 0.6752342193685472}
Correct predictions are:  4171
Total predictions are:  5000
Accuracy on test set is: 0.8342 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6376186425516758, 'tp': 2010, 'tn': 2083, 'fp': 410, 'fn': 497, 'auroc': 0.9061006238288909, 'auprc': 0.9055691520140617, 'eval_loss': 0.7795262167058885}
Correct predictions are:  4093
Total predictions are:  5000
Accuracy on test set is: 0.8186 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6731555137460322, 'tp': 2172, 'tn': 2008, 'fp': 485, 'fn': 335, 'auroc': 0.9177219949404403, 'auprc': 0.9147316359491419, 'eval_loss': 0.799166458953917}
Correct predictions are:  4180
Total predictions are:  5000
Accuracy on test set is: 0.836 


[0.8426, 0.8492, 0.8442, 0.842, 0.8312, 0.837, 0.8396, 0.8342, 0.8186, 0.836]

RUN NUMBER:  5


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.695666489888328, 'tp': 2202, 'tn': 2034, 'fp': 459, 'fn': 305, 'auroc': 0.9273671105581468, 'auprc': 0.9213285212318222, 'eval_loss': 0.3829358983755112}
Correct predictions are:  4236
Total predictions are:  5000
Accuracy on test set is: 0.8472 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.7032383392786845, 'tp': 2151, 'tn': 2107, 'fp': 386, 'fn': 356, 'auroc': 0.932203948478956, 'auprc': 0.9316861038212311, 'eval_loss': 0.44169727124869823}
Correct predictions are:  4258
Total predictions are:  5000
Accuracy on test set is: 0.8516 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.705225159424745, 'tp': 2183, 'tn': 2079, 'fp': 414, 'fn': 324, 'auroc': 0.9292552853614374, 'auprc': 0.9281668939705336, 'eval_loss': 0.4842326726168394}
Correct predictions are:  4262
Total predictions are:  5000
Accuracy on test set is: 0.8524 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6907394364318424, 'tp': 2179, 'tn': 2046, 'fp': 447, 'fn': 328, 'auroc': 0.925162933277397, 'auprc': 0.9236000449637378, 'eval_loss': 0.6481956712402404}
Correct predictions are:  4225
Total predictions are:  5000
Accuracy on test set is: 0.845 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6858567272958316, 'tp': 2150, 'tn': 2064, 'fp': 429, 'fn': 357, 'auroc': 0.922893555485475, 'auprc': 0.9231167709563171, 'eval_loss': 0.7049801646053792}
Correct predictions are:  4214
Total predictions are:  5000
Accuracy on test set is: 0.8428 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6903741153391662, 'tp': 2204, 'tn': 2018, 'fp': 475, 'fn': 303, 'auroc': 0.9269320671474065, 'auprc': 0.9234274327086325, 'eval_loss': 0.6171193950220942}
Correct predictions are:  4222
Total predictions are:  5000
Accuracy on test set is: 0.8444 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6928564876262332, 'tp': 2209, 'tn': 2019, 'fp': 474, 'fn': 298, 'auroc': 0.924448207673948, 'auprc': 0.9208823162307691, 'eval_loss': 0.7185946645528078}
Correct predictions are:  4228
Total predictions are:  5000
Accuracy on test set is: 0.8456 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6793853126704519, 'tp': 2015, 'tn': 2179, 'fp': 314, 'fn': 492, 'auroc': 0.9241216451136978, 'auprc': 0.9197209510487647, 'eval_loss': 0.68341139209494}
Correct predictions are:  4194
Total predictions are:  5000
Accuracy on test set is: 0.8388 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.674477559074557, 'tp': 2054, 'tn': 2131, 'fp': 362, 'fn': 453, 'auroc': 0.9129568375816066, 'auprc': 0.9121523327592447, 'eval_loss': 0.7140338158130646}
Correct predictions are:  4185
Total predictions are:  5000
Accuracy on test set is: 0.837 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6914500875398533, 'tp': 2212, 'tn': 2012, 'fp': 481, 'fn': 295, 'auroc': 0.9221559497026457, 'auprc': 0.9188076337052792, 'eval_loss': 0.688914847587049}
Correct predictions are:  4224
Total predictions are:  5000
Accuracy on test set is: 0.8448 


[0.8472, 0.8516, 0.8524, 0.845, 0.8428, 0.8444, 0.8456, 0.8388, 0.837, 0.8448]


 Over all runs maximum accuracies are: [0.846, 0.846, 0.8492, 0.8518, 0.8524]
The median is: 0.8492
RoBERTa Accuracy Score on Test set ->  ['0.8492 +/- 0.0032000000000000917']


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.



* * * * EVALUATION USING STM_RSW AS PREPROCESSING FUNCTION * * * *

RUN NUMBER:  1


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.56926213792018, 'tp': 1952, 'tn': 1971, 'fp': 522, 'fn': 555, 'auroc': 0.8614464337400405, 'auprc': 0.8581035092499664, 'eval_loss': 0.49904331970214844}
Correct predictions are:  3923
Total predictions are:  5000
Accuracy on test set is: 0.7846 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6087599426706127, 'tp': 2096, 'tn': 1923, 'fp': 570, 'fn': 411, 'auroc': 0.8858127047716053, 'auprc': 0.8838379763167545, 'eval_loss': 0.46257683487534523}
Correct predictions are:  4019
Total predictions are:  5000
Accuracy on test set is: 0.8038 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6175989774388488, 'tp': 2035, 'tn': 2009, 'fp': 484, 'fn': 472, 'auroc': 0.8875827186485141, 'auprc': 0.8861115534798317, 'eval_loss': 0.4676701416492462}
Correct predictions are:  4044
Total predictions are:  5000
Accuracy on test set is: 0.8088 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6234119695464245, 'tp': 2071, 'tn': 1987, 'fp': 506, 'fn': 436, 'auroc': 0.8964867084557944, 'auprc': 0.895974867051037, 'eval_loss': 0.4597797341823578}
Correct predictions are:  4058
Total predictions are:  5000
Accuracy on test set is: 0.8116 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.48858268277175887, 'tp': 2442, 'tn': 1092, 'fp': 1401, 'fn': 65, 'auroc': 0.8834701264057909, 'auprc': 0.8817824809088952, 'eval_loss': 0.5610344207763672}
Correct predictions are:  3534
Total predictions are:  5000
Accuracy on test set is: 0.7068 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6343542820044259, 'tp': 2126, 'tn': 1957, 'fp': 536, 'fn': 381, 'auroc': 0.898799046584525, 'auprc': 0.8947044289251418, 'eval_loss': 0.5206090889930725}
Correct predictions are:  4083
Total predictions are:  5000
Accuracy on test set is: 0.8166 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5665471837592019, 'tp': 1639, 'tn': 2234, 'fp': 259, 'fn': 868, 'auroc': 0.8832566847324084, 'auprc': 0.8806765741264724, 'eval_loss': 0.4848192817211151}
Correct predictions are:  3873
Total predictions are:  5000
Accuracy on test set is: 0.7746 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6048546619126957, 'tp': 2204, 'tn': 1789, 'fp': 704, 'fn': 303, 'auroc': 0.8824123581128875, 'auprc': 0.8424687331275246, 'eval_loss': 0.4850967926681042}
Correct predictions are:  3993
Total predictions are:  5000
Accuracy on test set is: 0.7986 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5748251055691894, 'tp': 2339, 'tn': 1523, 'fp': 970, 'fn': 168, 'auroc': 0.879844097977728, 'auprc': 0.8752595649711874, 'eval_loss': 0.531081270968914}
Correct predictions are:  3862
Total predictions are:  5000
Accuracy on test set is: 0.7724 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': -0.01410400339712391, 'tp': 2506, 'tn': 0, 'fp': 2493, 'fn': 1, 'auroc': 0.7456719260679004, 'auprc': 0.7552469671176342, 'eval_loss': 0.6931444772720337}
Correct predictions are:  2506
Total predictions are:  5000
Accuracy on test set is: 0.5012 


[0.7846, 0.8038, 0.8088, 0.8116, 0.7068, 0.8166, 0.7746, 0.7986, 0.7724, 0.5012]

RUN NUMBER:  2


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.7186114739139555, 'auprc': 0.6960950057369979, 'eval_loss': 0.695198750782013}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.7121087829328582, 'auprc': 0.7104869239974186, 'eval_loss': 0.6931868728637696}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.45195356066289144, 'tp': 2299, 'tn': 1226, 'fp': 1267, 'fn': 208, 'auroc': 0.828485535326597, 'auprc': 0.8180873027165998, 'eval_loss': 0.58785916659832}
Correct predictions are:  3525
Total predictions are:  5000
Accuracy on test set is: 0.705 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5497038014645583, 'tp': 1922, 'tn': 1952, 'fp': 541, 'fn': 585, 'auroc': 0.8420221214534321, 'auprc': 0.824593952961478, 'eval_loss': 0.5202497611999511}
Correct predictions are:  3874
Total predictions are:  5000
Accuracy on test set is: 0.7748 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5572817671010168, 'tp': 2008, 'tn': 1884, 'fp': 609, 'fn': 499, 'auroc': 0.8600016224127198, 'auprc': 0.8472137930122975, 'eval_loss': 0.504831457066536}
Correct predictions are:  3892
Total predictions are:  5000
Accuracy on test set is: 0.7784 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.560595470828722, 'tp': 2101, 'tn': 1791, 'fp': 702, 'fn': 406, 'auroc': 0.8606201872622682, 'auprc': 0.8371002656586846, 'eval_loss': 0.5287472606420517}
Correct predictions are:  3892
Total predictions are:  5000
Accuracy on test set is: 0.7784 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5736060215348768, 'tp': 1969, 'tn': 1965, 'fp': 528, 'fn': 538, 'auroc': 0.8528409262728619, 'auprc': 0.8046197729457433, 'eval_loss': 0.506335257923603}
Correct predictions are:  3934
Total predictions are:  5000
Accuracy on test set is: 0.7868 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5964517936067333, 'tp': 2022, 'tn': 1969, 'fp': 524, 'fn': 485, 'auroc': 0.8764340712431185, 'auprc': 0.8655810741187231, 'eval_loss': 0.4894348244905472}
Correct predictions are:  3991
Total predictions are:  5000
Accuracy on test set is: 0.7982 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.30320797482123185, 'tp': 2500, 'tn': 437, 'fp': 2056, 'fn': 7, 'auroc': 0.8602921046901006, 'auprc': 0.8295987488921004, 'eval_loss': 0.6348274010181427}
Correct predictions are:  2937
Total predictions are:  5000
Accuracy on test set is: 0.5874 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5155077911808644, 'tp': 1372, 'tn': 2317, 'fp': 176, 'fn': 1135, 'auroc': 0.8598389011369847, 'auprc': 0.8498454637096831, 'eval_loss': 0.55613126308918}
Correct predictions are:  3689
Total predictions are:  5000
Accuracy on test set is: 0.7378 


[0.5014, 0.5014, 0.705, 0.7748, 0.7784, 0.7784, 0.7868, 0.7982, 0.5874, 0.7378]

RUN NUMBER:  3


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5590888626468765, 'tp': 2020, 'tn': 1876, 'fp': 617, 'fn': 487, 'auroc': 0.8518171582465206, 'auprc': 0.8398871519047039, 'eval_loss': 0.511351130437851}
Correct predictions are:  3896
Total predictions are:  5000
Accuracy on test set is: 0.7792 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.561581956364974, 'tp': 1879, 'tn': 2022, 'fp': 471, 'fn': 628, 'auroc': 0.8557831893402045, 'auprc': 0.8508142409042642, 'eval_loss': 0.5130696384906769}
Correct predictions are:  3901
Total predictions are:  5000
Accuracy on test set is: 0.7802 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5719013688968596, 'tp': 2015, 'tn': 1914, 'fp': 579, 'fn': 492, 'auroc': 0.8698932199628444, 'auprc': 0.8665068939039479, 'eval_loss': 0.510422643506527}
Correct predictions are:  3929
Total predictions are:  5000
Accuracy on test set is: 0.7858 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5810391235661749, 'tp': 2083, 'tn': 1865, 'fp': 628, 'fn': 424, 'auroc': 0.8692616950116887, 'auprc': 0.8655736387567566, 'eval_loss': 0.4908457112431526}
Correct predictions are:  3948
Total predictions are:  5000
Accuracy on test set is: 0.7896 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.586128454922891, 'tp': 2086, 'tn': 1875, 'fp': 618, 'fn': 421, 'auroc': 0.8815639514613793, 'auprc': 0.8803552255127605, 'eval_loss': 0.5014783985733986}
Correct predictions are:  3961
Total predictions are:  5000
Accuracy on test set is: 0.7922 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6025340373712303, 'tp': 2115, 'tn': 1886, 'fp': 607, 'fn': 392, 'auroc': 0.8850105384826216, 'auprc': 0.8817054530905948, 'eval_loss': 0.4865640449523926}
Correct predictions are:  4001
Total predictions are:  5000
Accuracy on test set is: 0.8002 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.616595330126407, 'tp': 1944, 'tn': 2094, 'fp': 399, 'fn': 563, 'auroc': 0.8909568251015089, 'auprc': 0.890216987729316, 'eval_loss': 0.48490926439762116}
Correct predictions are:  4038
Total predictions are:  5000
Accuracy on test set is: 0.8076 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5869965029465504, 'tp': 2049, 'tn': 1917, 'fp': 576, 'fn': 458, 'auroc': 0.8826930803137496, 'auprc': 0.8791419722219078, 'eval_loss': 0.4913145833134651}
Correct predictions are:  3966
Total predictions are:  5000
Accuracy on test set is: 0.7932 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5958243790984561, 'tp': 1904, 'tn': 2081, 'fp': 412, 'fn': 603, 'auroc': 0.8849288578422454, 'auprc': 0.883529187642375, 'eval_loss': 0.5225866214394569}
Correct predictions are:  3985
Total predictions are:  5000
Accuracy on test set is: 0.797 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5512994110123702, 'tp': 1553, 'tn': 2265, 'fp': 228, 'fn': 954, 'auroc': 0.8851337394485173, 'auprc': 0.8838067453395626, 'eval_loss': 0.5165968301415443}
Correct predictions are:  3818
Total predictions are:  5000
Accuracy on test set is: 0.7636 


[0.7792, 0.7802, 0.7858, 0.7896, 0.7922, 0.8002, 0.8076, 0.7932, 0.797, 0.7636]

RUN NUMBER:  4


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.6407194232402782, 'auprc': 0.6349679609094777, 'eval_loss': 0.6931458656311035}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.7082266724971125, 'auprc': 0.703075449299899, 'eval_loss': 0.6932956792831421}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.7611000470243687, 'auprc': 0.7700967968781831, 'eval_loss': 0.6933502690315246}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.4604128380982544, 'tp': 1926, 'tn': 1722, 'fp': 771, 'fn': 581, 'auroc': 0.8004643556405482, 'auprc': 0.7905884902296412, 'eval_loss': 0.5713729392886162}
Correct predictions are:  3648
Total predictions are:  5000
Accuracy on test set is: 0.7296 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.4006511725944124, 'tp': 2300, 'tn': 1079, 'fp': 1414, 'fn': 207, 'auroc': 0.756783453182273, 'auprc': 0.6996732179797708, 'eval_loss': 0.6632152800559997}
Correct predictions are:  3379
Total predictions are:  5000
Accuracy on test set is: 0.6758 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.8048520700402291, 'auprc': 0.7668637185360925, 'eval_loss': 0.6931424863815308}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.8241585414029644, 'auprc': 0.8187762996298691, 'eval_loss': 0.6933121094703675}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.4571715283341526, 'tp': 2199, 'tn': 1385, 'fp': 1108, 'fn': 308, 'auroc': 0.6982524342990849, 'auprc': 0.6058015964391902, 'eval_loss': 0.6470907608985901}
Correct predictions are:  3584
Total predictions are:  5000
Accuracy on test set is: 0.7168 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.7102119680618296, 'auprc': 0.687339422544874, 'eval_loss': 0.6939451261520386}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.8248361467153903, 'auprc': 0.8230542713138769, 'eval_loss': 0.6906485738754272}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 


[0.4986, 0.4986, 0.5014, 0.7296, 0.6758, 0.5014, 0.5014, 0.7168, 0.4986, 0.5014]

RUN NUMBER:  5


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.44809415305816, 'auprc': 0.47064175954534526, 'eval_loss': 0.6931458128929138}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.7223176629704777, 'auprc': 0.7127223330534824, 'eval_loss': 0.6931544756889343}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.19182288082077276, 'tp': 241, 'tn': 2468, 'fp': 25, 'fn': 2266, 'auroc': 0.7677703393194603, 'auprc': 0.7650208828472134, 'eval_loss': 0.6911019265174866}
Correct predictions are:  2709
Total predictions are:  5000
Accuracy on test set is: 0.5418 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5124623567048652, 'tp': 1616, 'tn': 2135, 'fp': 358, 'fn': 891, 'auroc': 0.8319702026463887, 'auprc': 0.8364319133978023, 'eval_loss': 0.5852608355998993}
Correct predictions are:  3751
Total predictions are:  5000
Accuracy on test set is: 0.7502 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.37853606130118694, 'tp': 783, 'tn': 2424, 'fp': 69, 'fn': 1724, 'auroc': 0.829732105099704, 'auprc': 0.8332117252814952, 'eval_loss': 0.6209906264781951}
Correct predictions are:  3207
Total predictions are:  5000
Accuracy on test set is: 0.6414 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5074252003970103, 'tp': 1458, 'tn': 2243, 'fp': 250, 'fn': 1049, 'auroc': 0.8157187952353546, 'auprc': 0.8153808480755241, 'eval_loss': 0.5511054919958115}
Correct predictions are:  3701
Total predictions are:  5000
Accuracy on test set is: 0.7402 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5615971405611541, 'tp': 1965, 'tn': 1939, 'fp': 554, 'fn': 542, 'auroc': 0.7970163286080163, 'auprc': 0.8121305616507595, 'eval_loss': 0.5093169950008393}
Correct predictions are:  3904
Total predictions are:  5000
Accuracy on test set is: 0.7808 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5515666279753024, 'tp': 2151, 'tn': 1708, 'fp': 785, 'fn': 356, 'auroc': 0.832766368888332, 'auprc': 0.8334505390651674, 'eval_loss': 0.5396636650800705}
Correct predictions are:  3859
Total predictions are:  5000
Accuracy on test set is: 0.7718 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5886830964682837, 'tp': 2034, 'tn': 1937, 'fp': 556, 'fn': 473, 'auroc': 0.8727242021577449, 'auprc': 0.8596384142069038, 'eval_loss': 0.48395110214948656}
Correct predictions are:  3971
Total predictions are:  5000
Accuracy on test set is: 0.7942 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5721929271104332, 'tp': 1917, 'tn': 2012, 'fp': 481, 'fn': 590, 'auroc': 0.8480220084925467, 'auprc': 0.7910051509283302, 'eval_loss': 0.5029161836028099}
Correct predictions are:  3929
Total predictions are:  5000
Accuracy on test set is: 0.7858 


[0.5014, 0.5014, 0.5418, 0.7502, 0.6414, 0.7402, 0.7808, 0.7718, 0.7942, 0.7858]


 Over all runs maximum accuracies are: [0.7296, 0.7942, 0.7982, 0.8076, 0.8166]
The median is: 0.7982
RoBERTa Accuracy Score on Test set ->  ['0.7982 +/- 0.0686']


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.



* * * * EVALUATION USING LOW_STM_RSW AS PREPROCESSING FUNCTION * * * *

RUN NUMBER:  1


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.6742202458867277, 'auprc': 0.6248000990057347, 'eval_loss': 0.6932329743385315}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.48322096190016844, 'tp': 1878, 'tn': 1830, 'fp': 663, 'fn': 629, 'auroc': 0.7560886477349982, 'auprc': 0.6622589740354874, 'eval_loss': 0.5657286804676056}
Correct predictions are:  3708
Total predictions are:  5000
Accuracy on test set is: 0.7416 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5023200408413051, 'tp': 2148, 'tn': 1577, 'fp': 916, 'fn': 359, 'auroc': 0.8378383286524966, 'auprc': 0.8296966309466811, 'eval_loss': 0.5685660468935967}
Correct predictions are:  3725
Total predictions are:  5000
Accuracy on test set is: 0.745 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.4859750962072007, 'tp': 1408, 'tn': 2235, 'fp': 258, 'fn': 1099, 'auroc': 0.8462135943145795, 'auprc': 0.8487913626557171, 'eval_loss': 0.5129806872844695}
Correct predictions are:  3643
Total predictions are:  5000
Accuracy on test set is: 0.7286 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5535211080587922, 'tp': 2030, 'tn': 1851, 'fp': 642, 'fn': 477, 'auroc': 0.855294625509864, 'auprc': 0.8439621769924027, 'eval_loss': 0.5008398734092713}
Correct predictions are:  3881
Total predictions are:  5000
Accuracy on test set is: 0.7762 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5874455189229418, 'tp': 1957, 'tn': 2011, 'fp': 482, 'fn': 550, 'auroc': 0.8708664275927924, 'auprc': 0.8597225030374651, 'eval_loss': 0.48554887560606}
Correct predictions are:  3968
Total predictions are:  5000
Accuracy on test set is: 0.7936 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.607134471199384, 'tp': 2174, 'tn': 1831, 'fp': 662, 'fn': 333, 'auroc': 0.8626367630722226, 'auprc': 0.8133292154058174, 'eval_loss': 0.47001138731241227}
Correct predictions are:  4005
Total predictions are:  5000
Accuracy on test set is: 0.801 



EPOCH NUMBER:  7

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.29317424107778717, 'tp': 2500, 'tn': 412, 'fp': 2081, 'fn': 7, 'auroc': 0.28985603247129454, 'auprc': 0.3699026503200754, 'eval_loss': 0.6387789908409118}
Correct predictions are:  2912
Total predictions are:  5000
Accuracy on test set is: 0.5824 



EPOCH NUMBER:  8

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.5954966062166783, 'tp': 1856, 'tn': 2123, 'fp': 370, 'fn': 651, 'auroc': 0.8846191754143352, 'auprc': 0.8776795364183857, 'eval_loss': 0.4707309042930603}
Correct predictions are:  3979
Total predictions are:  5000
Accuracy on test set is: 0.7958 



EPOCH NUMBER:  9

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6103450319864049, 'tp': 2064, 'tn': 1961, 'fp': 532, 'fn': 443, 'auroc': 0.8813708299473069, 'auprc': 0.8707474353910716, 'eval_loss': 0.48367006678581237}
Correct predictions are:  4025
Total predictions are:  5000
Accuracy on test set is: 0.805 


[0.5014, 0.7416, 0.745, 0.7286, 0.7762, 0.7936, 0.801, 0.5824, 0.7958, 0.805]

RUN NUMBER:  2


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.


EPOCH NUMBER:  0

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6538507521040352, 'tp': 2041, 'tn': 2093, 'fp': 400, 'fn': 466, 'auroc': 0.9101135352901167, 'auprc': 0.9051421818675068, 'eval_loss': 0.3919079496830702}
Correct predictions are:  4134
Total predictions are:  5000
Accuracy on test set is: 0.8268 



EPOCH NUMBER:  1

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6693225080489179, 'tp': 2071, 'tn': 2102, 'fp': 391, 'fn': 436, 'auroc': 0.9148850926991269, 'auprc': 0.9108763448589942, 'eval_loss': 0.38410994802713394}
Correct predictions are:  4173
Total predictions are:  5000
Accuracy on test set is: 0.8346 



EPOCH NUMBER:  2

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6798639198590768, 'tp': 2163, 'tn': 2035, 'fp': 458, 'fn': 344, 'auroc': 0.918423040436637, 'auprc': 0.91427703978657, 'eval_loss': 0.44045182365179064}
Correct predictions are:  4198
Total predictions are:  5000
Accuracy on test set is: 0.8396 



EPOCH NUMBER:  3

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6744983686662157, 'tp': 2080, 'tn': 2106, 'fp': 387, 'fn': 427, 'auroc': 0.916775507519979, 'auprc': 0.9124995533077345, 'eval_loss': 0.5667140756770969}
Correct predictions are:  4186
Total predictions are:  5000
Accuracy on test set is: 0.8372 



EPOCH NUMBER:  4

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.6732006143749674, 'tp': 2096, 'tn': 2087, 'fp': 406, 'fn': 411, 'auroc': 0.9178192757031215, 'auprc': 0.9156744720353394, 'eval_loss': 0.6144832296952605}
Correct predictions are:  4183
Total predictions are:  5000
Accuracy on test set is: 0.8366 



EPOCH NUMBER:  5

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




{'mcc': 0.668123231219969, 'tp': 2250, 'tn': 1906, 'fp': 587, 'fn': 257, 'auroc': 0.9209861005310283, 'auprc': 0.9178322825172273, 'eval_loss': 0.7049847457826137}
Correct predictions are:  4156
Total predictions are:  5000
Accuracy on test set is: 0.8312 



EPOCH NUMBER:  6

NOW TRAIN THE MODEL.





NOW EVALUATE THE TEST DF.




## Now show compact results in a table.

In [None]:
print(" PREPRO FUNCTION    |  Test Accuracy   |",end = '')

print("\n")
for prepro_func in prepro_functions_dict_comb:
  #print(prepro_func,"\t\t\t",format(round(model_results[prepro_func][0],4),'.4f'),"\t\t",end='')
  result = model_results[prepro_func][0]
  # result = format(round(model_results[prepro_func][0],4),'.4f')
  print(f'{prepro_func:27}{ result :12}')
  print("\n")