<a href="https://colab.research.google.com/github/marco-siino/text_preprocessing_impact/blob/main/IMDB_DS/XLNet_IMDB_TextPreProImpact_NB.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Text preprocessing worth the time: A comparative survey on the impact of common techniques on NLP model performances. 
- - - 
XLNet ON IMDB DS EXPERIMENTS NOTEBOOK 
- - -
XLNet on Internet Movies Database Dataset.
Code by M. Siino. 

From the paper: "Text preprocessing worth the time: A comparative survey on the impact of common techniques on NLP model performances." by M.Siino et al.



## Importing modules.

In [None]:
!pip install simpletransformers
!pip install tensorboardx

#import matplotlib.pyplot as plt
import os
import re
import shutil
import string
import tensorflow as tf
import numpy as np
import torch
import nltk
import pandas as pd

from tensorflow.keras import layers
from tensorflow.keras import losses
from tensorflow.keras import preprocessing
from tensorflow.keras.models import Model
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize 
from nltk.stem import PorterStemmer
from textblob import TextBlob
nltk.download('stopwords')
nltk.download('punkt')
from io import open
from pathlib import Path
from simpletransformers.classification import ClassificationModel, ClassificationArgs


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## Importing DS and extract in current working directory.

In [None]:
url = "https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz"

dataset = tf.keras.utils.get_file("aclImdb_v1", url,
                                    untar=True, cache_dir='.',
                                    cache_subdir='')

dataset_dir = os.path.join(os.path.dirname(dataset), 'aclImdb')

train_set_dir = os.path.join(dataset_dir, 'train')
test_set_dir = os.path.join(dataset_dir, 'test')

remove_dir = os.path.join(train_set_dir, 'unsup')
shutil.rmtree(remove_dir)

Downloading data from https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz


## Building the dataset.

In [None]:
# Generate full randomized training set.
batch_size = 1
seed = 1

train_ds = tf.keras.preprocessing.text_dataset_from_directory(
    'aclImdb/train', 
    batch_size=batch_size,
    shuffle=False,
    seed=seed
    )

test_ds = tf.keras.preprocessing.text_dataset_from_directory(
    'aclImdb/test', 
    batch_size=batch_size,
    shuffle=False,
    seed=seed
    )

train_ds = train_ds.shuffle(25000,seed=1,reshuffle_each_iteration = False)
test_ds = test_ds.shuffle(25000,seed=1,reshuffle_each_iteration = False)

train_ds = train_ds.take(5000)
test_ds = test_ds.take(5000)

train_ds_size=len(train_ds)
test_ds_size=len(test_ds)

Found 25000 files belonging to 2 classes.
Found 25000 files belonging to 2 classes.


## Functions to pre-process source text. (A detailed discussion on our paper)

In [None]:
# Do-Nothing preprocessing function.
def DON(input_data):
  tag_open_CDATA_removed = tf.strings.regex_replace(input_data, '<\!\[CDATA\[', ' ')
  tag_closed_CDATA_removed = tf.strings.regex_replace(tag_open_CDATA_removed,'\]{1,}>', ' ')
  tag_author_lang_en_removed = tf.strings.regex_replace(tag_closed_CDATA_removed,'<author lang="en">', ' ')
  tag_closed_author_removed = tf.strings.regex_replace(tag_author_lang_en_removed,'</author>', ' ')
  tag_open_documents_removed = tf.strings.regex_replace(tag_closed_author_removed,'<documents>\n(\t){0,2}', '')
  output_data = tf.strings.regex_replace(tag_open_documents_removed,'</documents>\n(\t){0,2}', ' ')
  return output_data

# Lowercasing preprocessing function.
def LOW(input_data):  
  return tf.strings.lower(DON(input_data))

# Removing Stop Words function.
def RSW(input_data):
  output_data = DON(input_data)

  #print("\n\nInput data è il seguente tensore:")
  #print(output_data)

  #print("Lo converto in stringa e diventa:")
  # Il seguente try per l'adattamento del ts. Nell'except caso della simulazione vera e propria.
  try:
    input_string=output_data[0]

  # # # # # # # Questo è il caso della chiamata a funzione per la simulazione vera e propria.  
  except:
    #print("\n\n****CASO DELLA SIMULAZIONE VERA E PROPRIA****\n\n")
    #print("\nQuesto è il contenuto di output data in caso di simulazione")
    #print(output_data)
    input_string=output_data
    
    try:
      input_string = input_string.numpy()
    
    except:
      #print("This one is not a tensor!")
      return output_data

    else:
      #print("\nEstraendo il contenuto del tensore risulta:")
      input_string=(str(input_string))[2:-1]

    #print(input_string)
    blob = TextBlob(str(input_string)).words

    outputlist = [word for word in blob if word not in stopwords.words('english')]
    #print("tolte le stopword inglesi diventa:")

    output_string = (' '.join(word for word in outputlist))
    #print(output_string)  

    output_tensor=tf.constant(output_string)
    #print(output_tensor)

    return output_tensor

   # # # # # # # Questo è il caso dell'adattamento del TS.   
  else:
    
    try:

      # input_string = input_string.numpy() [0]
      input_string = input_string.numpy()
    
    except:
      #print("This one is not a tensor!")
      return output_data

    else:
      input_string=(str(input_string))[2:-1]

    #print(input_string)
    blob = TextBlob(str(input_string)).words

    outputlist = [word for word in blob if word not in stopwords.words('english')]
    #print("Tolte le stopword inglesi diventa:")

    output_string = (' '.join(word for word in outputlist))
    #print(output_string)  

    output_tensor=tf.constant([[output_string]])
    #print(output_tensor)

    return output_tensor

  return output_data

# Porter Stemmer preprocessing function.
def STM(input_data):
  output_data = DON(input_data)
  stemmer = PorterStemmer()

  #print("\n\nInput data è il seguente tensore:")
  #print(output_data)

  #print("Lo converto in stringa e diventa:")
  # Il seguente try per l'adattamento del ts. Nell'except caso della simulazione vera e propria.
  try:
    input_string=output_data[0]

  # # # # # # # Questo è il caso della chiamata a funzione per la simulazione vera e propria.  
  except:
    #print("\n\n****CASO DELLA SIMULAZIONE VERA E PROPRIA****\n\n")
    #print("\nQuesto è il contenuto di output data in caso di simulazione")
    #print(output_data)
    input_string=output_data
    
    try:
      input_string = input_string.numpy()
    
    except:
      #print("This one is not a tensor!")
      return output_data

    else:
      #print("\nEstraendo il contenuto del tensore risulta:")
      #print(input_string)
      input_string=(str(input_string))[2:-1]

    #print(input_string)
    blob = TextBlob(str(input_string)).words

    outputlist = [stemmer.stem(word) for word in blob]

    output_string = (' '.join(word for word in outputlist))
    #print(output_string)  

    output_tensor=tf.constant(output_string)
    #print(output_tensor)

    return output_tensor

   # # # # # # # Questo è il caso dell'adattamento del TS.   
  else:
    
    try:
      #input_string = input_string.numpy()[0]
      input_string = input_string.numpy()
      #print(input_string)
    
    except:
      #print("This one is not a tensor!")
      return output_data

    else:
      input_string=(str(input_string))[2:-1]

    #print(input_string)
    blob = TextBlob(str(input_string)).words

    outputlist = [stemmer.stem(word) for word in blob]

    output_string = (' '.join(word for word in outputlist))

    output_tensor=tf.constant([[output_string]])
    #print(output_tensor)

    return output_tensor

  return output_data

## Define the combined preprocessing functions. (The base functions are: DON, LOW, RSW and STM).

In [None]:
## SECTION WITH PAIRS OF PREPRO FUNCTIONS. APPLICATION ORDER MATTERS (...IN FOLLOWING SECTIONS TOO).
#...5
def LOW_RSW(input_data):
  return RSW(LOW(input_data))

# 6
def LOW_STM(input_data):
  return STM(LOW(input_data))

# 7
def RSW_LOW(input_data):
  return LOW(RSW(input_data))

# 8
def RSW_STM(input_data):
  return STM(RSW(input_data))

# 9
def STM_LOW(input_data):
  return LOW(STM(input_data))

# 10
def STM_RSW(input_data):
  return RSW(STM(input_data))
  
# 11
def LOW_STM_RSW(input_data):
  return RSW(STM(LOW(input_data)))

# 12
def LOW_RSW_STM(input_data):
  return STM(RSW(LOW(input_data)))

# 13
def STM_LOW_RSW(input_data):
  return RSW(LOW(STM(input_data)))

# 14
def STM_RSW_LOW(input_data):
  return LOW(RSW(STM(input_data)))

# 15
def RSW_LOW_STM(input_data):
  return STM(LOW(RSW(input_data)))

# 16
def RSW_STM_LOW(input_data):
  return LOW(STM(RSW(input_data)))

## Define a dictionary with -> function_names:prepro_function_caller. And a dictionary to store model results.

In [None]:
model_results = {}
prepro_functions_dict_base = {
    'DON':DON,
    'LOW':LOW,
    'RSW':RSW,
    'STM':STM
    }

# 3 prepro functions = 15 combs...+1 for do_nothing

prepro_functions_dict_comb = {
    # 1. Do nothing 
    'DON': DON,
    # 2. Lowercasing 
    'LOW':LOW,
    # 3. Removing Stopwords
    'RSW':RSW, 
    # 4. Porter Stemming
    'STM':STM,
    # 5. LOW->RSW
    'LOW_RSW':LOW_RSW, 
    # 6. LOW->STM
    'LOW_STM':LOW_STM,
    # 7. RSW->LOW
    'RSW_LOW':RSW_LOW,
    # 8. RSW->STM
    'RSW_STM':RSW_STM,
    # 9. STM->LOW
    'STM_LOW':STM_LOW,
    # 10. STM->RSW
    'STM_RSW':STM_RSW,
    # 11. LOW->STM->RSW
    'LOW_STM_RSW':LOW_STM_RSW,  
    # 12. LOW->RSW->STM
    'LOW_RSW_STM':LOW_RSW_STM,
    # 13. STM->LOW->RSW
    'STM_LOW_RSW':STM_LOW_RSW,
    # 14. STM->RSW->LOW
    'STM_RSW_LOW':STM_RSW_LOW,
    # 15. RSW->LOW->STM
    'RSW_LOW_STM':RSW_LOW_STM,
    # 16. RSW->STM->LOW
    'RSW_STM_LOW':RSW_STM_LOW
}

for key in prepro_functions_dict_comb:
  print(key)
  model_results[key]=[]

DON
LOW
RSW
STM
LOW_RSW
LOW_STM
RSW_LOW
RSW_STM
STM_LOW
STM_RSW
LOW_STM_RSW
LOW_RSW_STM
STM_LOW_RSW
STM_RSW_LOW
RSW_LOW_STM
RSW_STM_LOW


## Function to convert DSs to Pandas Dataframe

In [None]:
def preprocess_and_convert_ds(preprocessing_function):
  # Convert English dataset.
  train_df = [] # will contain text and label
  for element in train_ds:
    authorDocument=element[0]
    label=int(element[1].numpy())
    #print(authorDocument[0])
    text = preprocessing_function(authorDocument[0].numpy()).numpy().decode('UTF-8')
    train_df.append({
        'text':text,
        'label':label
    })
  train_df = pd.DataFrame(train_df)

  test_df = [] # will contain text and label
  for element in test_ds:
    authorDocument=element[0]
    label=int(element[1].numpy())
    #print(authorDocument[0])
    text = preprocessing_function(authorDocument[0].numpy()).numpy().decode('UTF-8')
    test_df.append({
        'text':text,
        'label':label
    })
  test_df = pd.DataFrame(test_df)

  return train_df, test_df


## Print some RAW and preprocessed samples (No need to execute)

In [None]:
for idx, element in enumerate(raw_train_ds_es):
  if idx>1: break
  authorDocument=element[0]
  label=element[1]
  temp = custom_standardization(authorDocument[0].numpy()).numpy().decode('UTF-8')
  print("Not-Preprocessed samples: \n",authorDocument)
  print("Preprocessed samples: \n",temp)

NameError: ignored

## Some parameters definition...

In [None]:
# check gpu
cuda_available = torch.cuda.is_available()

print('Cuda available? ',cuda_available)

num_epochs_per_run = 10
num_runs = 5

Cuda available?  True


## Training and evaluation of the model

In [None]:
for key in prepro_functions_dict_comb:
  model_results[key]=[]

for key in prepro_functions_dict_comb:

  model_args = ClassificationArgs(num_train_epochs=1, 
                                      no_save=True, 
                                      no_cache=True, 
                                      silent=True,
                                      overwrite_output_dir=True)

  model = ClassificationModel("xlnet", 
                                  'xlnet-base-cased', 
                                  args = model_args, 
                                  num_labels=2, 
                                  use_cuda=cuda_available)

  runs_accuracy = []

  print("\n\n* * * * EVALUATION USING", key, "AS PREPROCESSING FUNCTION * * * *")

  # Preprocess train and test set and convert to DFs.
  train_df,test_df = preprocess_and_convert_ds(prepro_functions_dict_comb[key])
  
  for run in range(1,(num_runs+1)):
    print("\nRUN NUMBER: ", run)
    epochs_accuracy=[]
    model = ClassificationModel("xlnet", 
                                  'xlnet-base-cased', 
                                  args = model_args, 
                                  num_labels=2, 
                                  use_cuda=cuda_available)
    for epoch in range (0,num_epochs_per_run):
      print("\nEPOCH NUMBER: ", epoch, "(RUN: ",run,"COMB: ",key,")")
      # train model
      print("\nNOW TRAIN THE MODEL.")
      model.train_model(train_df,show_running_loss=False)
      print("\nNOW EVALUATE THE TEST DF.")
      result, model_outputs, wrong_predictions = model.eval_model(test_df)
      # Results on test set.
      print(result)
      correct_predictions = result['tp']+result['tn']
      print("Correct predictions are: ",correct_predictions)
      total_predictions = result['tp']+result['tn']+result['fp']+result['fn']
      print("Total predictions are: ",total_predictions)
      accuracy = correct_predictions/total_predictions
      print("Accuracy on test set is:",accuracy,"\n\n")
      epochs_accuracy.append(accuracy)

    print(epochs_accuracy)
    runs_accuracy.append(max(epochs_accuracy))

  runs_accuracy.sort()
  print("\n\n Over all runs maximum accuracies are:", runs_accuracy)
  print("The median is:",runs_accuracy[2])

  if (runs_accuracy[2]-runs_accuracy[0])>(runs_accuracy[4]-runs_accuracy[2]):
    max_range_from_median = runs_accuracy[2]-runs_accuracy[0]
  else:
    max_range_from_median = runs_accuracy[4]-runs_accuracy[2]
  final_result = str(runs_accuracy[2])+" +/- "+ str(max_range_from_median)
  model_results[key].append(final_result)
  print("XLNet Accuracy Score on Test set -> ",model_results[key])


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a



* * * * EVALUATION USING DON AS PREPROCESSING FUNCTION * * * *

RUN NUMBER:  1


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  1 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7430678733896555, 'tp': 2153, 'tn': 2204, 'fp': 289, 'fn': 354, 'auroc': 0.9481388734087675, 'auprc': 0.9467360339150941, 'eval_loss': 0.32745938262939456}
Correct predictions are:  4357
Total predictions are:  5000
Accuracy on test set is: 0.8714 



EPOCH NUMBER:  1 (RUN:  1 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7652081807782233, 'tp': 2255, 'tn': 2157, 'fp': 336, 'fn': 252, 'auroc': 0.9527543495941009, 'auprc': 0.9503353211753124, 'eval_loss': 0.4308617321014404}
Correct predictions are:  4412
Total predictions are:  5000
Accuracy on test set is: 0.8824 



EPOCH NUMBER:  2 (RUN:  1 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7655601380661254, 'tp': 2302, 'tn': 2107, 'fp': 386, 'fn': 205, 'auroc': 0.9514089790463958, 'auprc': 0.9499217917180308, 'eval_loss': 0.5216941719055176}
Correct predictions are:  4409
Total predictions are:  5000
Accuracy on test set is: 0.8818 



EPOCH NUMBER:  3 (RUN:  1 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7621314943515953, 'tp': 2234, 'tn': 2171, 'fp': 322, 'fn': 273, 'auroc': 0.9508608147487876, 'auprc': 0.94953109084958, 'eval_loss': 0.6122892295837402}
Correct predictions are:  4405
Total predictions are:  5000
Accuracy on test set is: 0.881 



EPOCH NUMBER:  4 (RUN:  1 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7611978734849877, 'tp': 2210, 'tn': 2193, 'fp': 300, 'fn': 297, 'auroc': 0.9509606555315395, 'auprc': 0.9484905222641944, 'eval_loss': 0.6758492101669311}
Correct predictions are:  4403
Total predictions are:  5000
Accuracy on test set is: 0.8806 



EPOCH NUMBER:  5 (RUN:  1 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7520165666239198, 'tp': 2207, 'tn': 2173, 'fp': 320, 'fn': 300, 'auroc': 0.9440904416690628, 'auprc': 0.9419293912423911, 'eval_loss': 0.6839058714389801}
Correct predictions are:  4380
Total predictions are:  5000
Accuracy on test set is: 0.876 



EPOCH NUMBER:  6 (RUN:  1 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7488018752130217, 'tp': 2190, 'tn': 2182, 'fp': 311, 'fn': 317, 'auroc': 0.9400681701344539, 'auprc': 0.9386336448984557, 'eval_loss': 0.7808212065935135}
Correct predictions are:  4372
Total predictions are:  5000
Accuracy on test set is: 0.8744 



EPOCH NUMBER:  7 (RUN:  1 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7412037684937344, 'tp': 2242, 'tn': 2109, 'fp': 384, 'fn': 265, 'auroc': 0.9415624218493873, 'auprc': 0.9283200605239872, 'eval_loss': 0.860298804116249}
Correct predictions are:  4351
Total predictions are:  5000
Accuracy on test set is: 0.8702 



EPOCH NUMBER:  8 (RUN:  1 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7464092630777266, 'tp': 2303, 'tn': 2055, 'fp': 438, 'fn': 204, 'auroc': 0.9440283611823517, 'auprc': 0.9422297967136394, 'eval_loss': 0.805157901763916}
Correct predictions are:  4358
Total predictions are:  5000
Accuracy on test set is: 0.8716 



EPOCH NUMBER:  9 (RUN:  1 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7448819265942569, 'tp': 2208, 'tn': 2154, 'fp': 339, 'fn': 299, 'auroc': 0.9435343573093613, 'auprc': 0.9414505718112126, 'eval_loss': 0.942649913430214}
Correct predictions are:  4362
Total predictions are:  5000
Accuracy on test set is: 0.8724 


[0.8714, 0.8824, 0.8818, 0.881, 0.8806, 0.876, 0.8744, 0.8702, 0.8716, 0.8724]

RUN NUMBER:  2


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  2 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7712224325853425, 'tp': 2232, 'tn': 2196, 'fp': 297, 'fn': 275, 'auroc': 0.9532745136721872, 'auprc': 0.9503896348380567, 'eval_loss': 0.32588939094543456}
Correct predictions are:  4428
Total predictions are:  5000
Accuracy on test set is: 0.8856 



EPOCH NUMBER:  1 (RUN:  2 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7624094943771552, 'tp': 2218, 'tn': 2188, 'fp': 305, 'fn': 289, 'auroc': 0.9541144402572117, 'auprc': 0.9528769676496154, 'eval_loss': 0.4375708854675293}
Correct predictions are:  4406
Total predictions are:  5000
Accuracy on test set is: 0.8812 



EPOCH NUMBER:  2 (RUN:  2 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7720576275020449, 'tp': 2281, 'tn': 2147, 'fp': 346, 'fn': 226, 'auroc': 0.9525177077388287, 'auprc': 0.9503980334423305, 'eval_loss': 0.465758517408371}
Correct predictions are:  4428
Total predictions are:  5000
Accuracy on test set is: 0.8856 



EPOCH NUMBER:  3 (RUN:  2 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7607384433216434, 'tp': 2246, 'tn': 2155, 'fp': 338, 'fn': 261, 'auroc': 0.9509426553904183, 'auprc': 0.949460022697294, 'eval_loss': 0.5734054266214371}
Correct predictions are:  4401
Total predictions are:  5000
Accuracy on test set is: 0.8802 



EPOCH NUMBER:  4 (RUN:  2 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7584982921612917, 'tp': 2266, 'tn': 2128, 'fp': 365, 'fn': 241, 'auroc': 0.9327457927270151, 'auprc': 0.8923652036870832, 'eval_loss': 0.6589058979272843}
Correct predictions are:  4394
Total predictions are:  5000
Accuracy on test set is: 0.8788 



EPOCH NUMBER:  5 (RUN:  2 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7600019200132834, 'tp': 2204, 'tn': 2196, 'fp': 297, 'fn': 303, 'auroc': 0.9488656791069242, 'auprc': 0.9448563460246863, 'eval_loss': 0.6688964517593384}
Correct predictions are:  4400
Total predictions are:  5000
Accuracy on test set is: 0.88 



EPOCH NUMBER:  6 (RUN:  2 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7584093094056528, 'tp': 2213, 'tn': 2183, 'fp': 310, 'fn': 294, 'auroc': 0.9470512648819167, 'auprc': 0.9459788094200906, 'eval_loss': 0.6141171579360962}
Correct predictions are:  4396
Total predictions are:  5000
Accuracy on test set is: 0.8792 



EPOCH NUMBER:  7 (RUN:  2 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7504259164115267, 'tp': 2207, 'tn': 2169, 'fp': 324, 'fn': 300, 'auroc': 0.9437351588836456, 'auprc': 0.9367304963836462, 'eval_loss': 0.7552955173492432}
Correct predictions are:  4376
Total predictions are:  5000
Accuracy on test set is: 0.8752 



EPOCH NUMBER:  8 (RUN:  2 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7504684660564925, 'tp': 2179, 'tn': 2197, 'fp': 296, 'fn': 328, 'auroc': 0.9390177618992535, 'auprc': 0.9183520451739036, 'eval_loss': 0.747747387456894}
Correct predictions are:  4376
Total predictions are:  5000
Accuracy on test set is: 0.8752 



EPOCH NUMBER:  9 (RUN:  2 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7268543223182059, 'tp': 2122, 'tn': 2194, 'fp': 299, 'fn': 385, 'auroc': 0.939303444139002, 'auprc': 0.9288184478258126, 'eval_loss': 0.7342808674812317}
Correct predictions are:  4316
Total predictions are:  5000
Accuracy on test set is: 0.8632 


[0.8856, 0.8812, 0.8856, 0.8802, 0.8788, 0.88, 0.8792, 0.8752, 0.8752, 0.8632]

RUN NUMBER:  3


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  3 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7631594291330638, 'tp': 2290, 'tn': 2114, 'fp': 379, 'fn': 217, 'auroc': 0.953798357779125, 'auprc': 0.9515584788841998, 'eval_loss': 0.28480941314697267}
Correct predictions are:  4404
Total predictions are:  5000
Accuracy on test set is: 0.8808 



EPOCH NUMBER:  1 (RUN:  3 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7715733685644782, 'tp': 2290, 'tn': 2136, 'fp': 357, 'fn': 217, 'auroc': 0.9529794713590556, 'auprc': 0.9496258212981366, 'eval_loss': 0.4108632438659668}
Correct predictions are:  4426
Total predictions are:  5000
Accuracy on test set is: 0.8852 



EPOCH NUMBER:  2 (RUN:  3 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7704969782067448, 'tp': 2201, 'tn': 2225, 'fp': 268, 'fn': 306, 'auroc': 0.9533709944285964, 'auprc': 0.9521662402370228, 'eval_loss': 0.46457982873916626}
Correct predictions are:  4426
Total predictions are:  5000
Accuracy on test set is: 0.8852 



EPOCH NUMBER:  3 (RUN:  3 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7628112612410314, 'tp': 2219, 'tn': 2188, 'fp': 305, 'fn': 288, 'auroc': 0.9518404224289119, 'auprc': 0.9504204628087621, 'eval_loss': 0.5823481289863587}
Correct predictions are:  4407
Total predictions are:  5000
Accuracy on test set is: 0.8814 



EPOCH NUMBER:  4 (RUN:  3 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7487449157938141, 'tp': 2249, 'tn': 2121, 'fp': 372, 'fn': 258, 'auroc': 0.9475493487868945, 'auprc': 0.9444003532756503, 'eval_loss': 0.6939863550662995}
Correct predictions are:  4370
Total predictions are:  5000
Accuracy on test set is: 0.874 



EPOCH NUMBER:  5 (RUN:  3 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.756464703907524, 'tp': 2187, 'tn': 2204, 'fp': 289, 'fn': 320, 'auroc': 0.946950624092893, 'auprc': 0.944300575622387, 'eval_loss': 0.7535888725042343}
Correct predictions are:  4391
Total predictions are:  5000
Accuracy on test set is: 0.8782 



EPOCH NUMBER:  6 (RUN:  3 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.737017429424486, 'tp': 2230, 'tn': 2111, 'fp': 382, 'fn': 277, 'auroc': 0.9392177634672657, 'auprc': 0.9348856329475218, 'eval_loss': 0.8221415294647216}
Correct predictions are:  4341
Total predictions are:  5000
Accuracy on test set is: 0.8682 



EPOCH NUMBER:  7 (RUN:  3 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7400018400128159, 'tp': 2179, 'tn': 2171, 'fp': 322, 'fn': 328, 'auroc': 0.9393055241553094, 'auprc': 0.9304683234810319, 'eval_loss': 0.832569152379036}
Correct predictions are:  4350
Total predictions are:  5000
Accuracy on test set is: 0.87 



EPOCH NUMBER:  8 (RUN:  3 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.753107625562924, 'tp': 2235, 'tn': 2147, 'fp': 346, 'fn': 272, 'auroc': 0.9404939334724384, 'auprc': 0.9287305472374497, 'eval_loss': 0.7933264469385147}
Correct predictions are:  4382
Total predictions are:  5000
Accuracy on test set is: 0.8764 



EPOCH NUMBER:  9 (RUN:  3 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7444302708847869, 'tp': 2177, 'tn': 2184, 'fp': 309, 'fn': 330, 'auroc': 0.9398973687953713, 'auprc': 0.9351444008017331, 'eval_loss': 0.8074846596717834}
Correct predictions are:  4361
Total predictions are:  5000
Accuracy on test set is: 0.8722 


[0.8808, 0.8852, 0.8852, 0.8814, 0.874, 0.8782, 0.8682, 0.87, 0.8764, 0.8722]

RUN NUMBER:  4


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  4 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7677329217661575, 'tp': 2241, 'tn': 2178, 'fp': 315, 'fn': 266, 'auroc': 0.9547818054893551, 'auprc': 0.9524611249167848, 'eval_loss': 0.33439849452972414}
Correct predictions are:  4419
Total predictions are:  5000
Accuracy on test set is: 0.8838 



EPOCH NUMBER:  1 (RUN:  4 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7666177690062312, 'tp': 2246, 'tn': 2170, 'fp': 323, 'fn': 261, 'auroc': 0.9529913914525089, 'auprc': 0.9479325167965246, 'eval_loss': 0.39486953201293945}
Correct predictions are:  4416
Total predictions are:  5000
Accuracy on test set is: 0.8832 



EPOCH NUMBER:  2 (RUN:  4 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7614231200018614, 'tp': 2240, 'tn': 2163, 'fp': 330, 'fn': 267, 'auroc': 0.9514414593010408, 'auprc': 0.9452325984457647, 'eval_loss': 0.5309974311828614}
Correct predictions are:  4403
Total predictions are:  5000
Accuracy on test set is: 0.8806 



EPOCH NUMBER:  3 (RUN:  4 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7625700338941488, 'tp': 2259, 'tn': 2146, 'fp': 347, 'fn': 248, 'auroc': 0.9502100096464756, 'auprc': 0.943738842670121, 'eval_loss': 0.6472653386116027}
Correct predictions are:  4405
Total predictions are:  5000
Accuracy on test set is: 0.881 



EPOCH NUMBER:  4 (RUN:  4 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7670562311792483, 'tp': 2249, 'tn': 2168, 'fp': 325, 'fn': 258, 'auroc': 0.9527349894423172, 'auprc': 0.9497310947728886, 'eval_loss': 0.6234149250507355}
Correct predictions are:  4417
Total predictions are:  5000
Accuracy on test set is: 0.8834 



EPOCH NUMBER:  5 (RUN:  4 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7628009942811588, 'tp': 2208, 'tn': 2199, 'fp': 294, 'fn': 299, 'auroc': 0.9481786337204883, 'auprc': 0.9431045967947922, 'eval_loss': 0.6784190249204636}
Correct predictions are:  4407
Total predictions are:  5000
Accuracy on test set is: 0.8814 



EPOCH NUMBER:  6 (RUN:  4 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.75446868175237, 'tp': 2184, 'tn': 2202, 'fp': 291, 'fn': 323, 'auroc': 0.9267334255900567, 'auprc': 0.9377927093395585, 'eval_loss': 0.8411419273614883}
Correct predictions are:  4386
Total predictions are:  5000
Accuracy on test set is: 0.8772 



EPOCH NUMBER:  7 (RUN:  4 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7534348415622661, 'tp': 2231, 'tn': 2152, 'fp': 341, 'fn': 276, 'auroc': 0.944418924244366, 'auprc': 0.934869908066811, 'eval_loss': 0.8398147820711136}
Correct predictions are:  4383
Total predictions are:  5000
Accuracy on test set is: 0.8766 



EPOCH NUMBER:  8 (RUN:  4 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7625904057183598, 'tp': 2183, 'tn': 2223, 'fp': 270, 'fn': 324, 'auroc': 0.9471277454815246, 'auprc': 0.9407568531410206, 'eval_loss': 0.7983949614286423}
Correct predictions are:  4406
Total predictions are:  5000
Accuracy on test set is: 0.8812 



EPOCH NUMBER:  9 (RUN:  4 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7581193658591304, 'tp': 2228, 'tn': 2167, 'fp': 326, 'fn': 279, 'auroc': 0.9445070049349187, 'auprc': 0.9346510366857668, 'eval_loss': 0.849857437491417}
Correct predictions are:  4395
Total predictions are:  5000
Accuracy on test set is: 0.879 


[0.8838, 0.8832, 0.8806, 0.881, 0.8834, 0.8814, 0.8772, 0.8766, 0.8812, 0.879]

RUN NUMBER:  5


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  5 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7665173268385373, 'tp': 2194, 'tn': 2222, 'fp': 271, 'fn': 313, 'auroc': 0.9532476334614464, 'auprc': 0.9518762565150884, 'eval_loss': 0.30187781028747557}
Correct predictions are:  4416
Total predictions are:  5000
Accuracy on test set is: 0.8832 



EPOCH NUMBER:  1 (RUN:  5 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7765127966979094, 'tp': 2250, 'tn': 2191, 'fp': 302, 'fn': 257, 'auroc': 0.954612764164071, 'auprc': 0.9521588601088967, 'eval_loss': 0.37536391010284426}
Correct predictions are:  4441
Total predictions are:  5000
Accuracy on test set is: 0.8882 



EPOCH NUMBER:  2 (RUN:  5 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7555994491197685, 'tp': 2200, 'tn': 2189, 'fp': 304, 'fn': 307, 'auroc': 0.9503006503570988, 'auprc': 0.9478995314286358, 'eval_loss': 0.496437552690506}
Correct predictions are:  4389
Total predictions are:  5000
Accuracy on test set is: 0.8778 



EPOCH NUMBER:  3 (RUN:  5 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7671989418191497, 'tp': 2215, 'tn': 2203, 'fp': 290, 'fn': 292, 'auroc': 0.9519769834995506, 'auprc': 0.9505263824078323, 'eval_loss': 0.6047092842578888}
Correct predictions are:  4418
Total predictions are:  5000
Accuracy on test set is: 0.8836 



EPOCH NUMBER:  4 (RUN:  5 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7560687680307211, 'tp': 2186, 'tn': 2204, 'fp': 289, 'fn': 321, 'auroc': 0.9494788039138228, 'auprc': 0.9487948121373785, 'eval_loss': 0.6950506947278976}
Correct predictions are:  4390
Total predictions are:  5000
Accuracy on test set is: 0.878 



EPOCH NUMBER:  5 (RUN:  5 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7628650239292135, 'tp': 2195, 'tn': 2212, 'fp': 281, 'fn': 312, 'auroc': 0.9487156779309149, 'auprc': 0.9479650307102808, 'eval_loss': 0.7796709797620773}
Correct predictions are:  4407
Total predictions are:  5000
Accuracy on test set is: 0.8814 



EPOCH NUMBER:  6 (RUN:  5 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.759889057484688, 'tp': 2173, 'tn': 2226, 'fp': 267, 'fn': 334, 'auroc': 0.9443573237614182, 'auprc': 0.9348909080371823, 'eval_loss': 0.8178927579879761}
Correct predictions are:  4399
Total predictions are:  5000
Accuracy on test set is: 0.8798 



EPOCH NUMBER:  7 (RUN:  5 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7408812038439581, 'tp': 2203, 'tn': 2149, 'fp': 344, 'fn': 304, 'auroc': 0.9370230262605259, 'auprc': 0.934859341920618, 'eval_loss': 0.7937661262750626}
Correct predictions are:  4352
Total predictions are:  5000
Accuracy on test set is: 0.8704 



EPOCH NUMBER:  8 (RUN:  5 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7488208375468071, 'tp': 2204, 'tn': 2168, 'fp': 325, 'fn': 303, 'auroc': 0.9438757999862719, 'auprc': 0.9409230341415994, 'eval_loss': 0.8267028653144837}
Correct predictions are:  4372
Total predictions are:  5000
Accuracy on test set is: 0.8744 



EPOCH NUMBER:  9 (RUN:  5 COMB:  DON )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.734250787217699, 'tp': 2085, 'tn': 2246, 'fp': 247, 'fn': 422, 'auroc': 0.9443710838692975, 'auprc': 0.9397143947619395, 'eval_loss': 0.847114543390274}
Correct predictions are:  4331
Total predictions are:  5000
Accuracy on test set is: 0.8662 


[0.8832, 0.8882, 0.8778, 0.8836, 0.878, 0.8814, 0.8798, 0.8704, 0.8744, 0.8662]


 Over all runs maximum accuracies are: [0.8824, 0.8838, 0.8852, 0.8856, 0.8882]
The median is: 0.8852
XLNet Accuracy Score on Test set ->  ['0.8852 +/- 0.0030000000000000027']


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a



* * * * EVALUATION USING LOW AS PREPROCESSING FUNCTION * * * *

RUN NUMBER:  1


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  1 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.757436517161535, 'tp': 2236, 'tn': 2157, 'fp': 336, 'fn': 271, 'auroc': 0.9490482405382058, 'auprc': 0.9472925259732714, 'eval_loss': 0.3047703025817871}
Correct predictions are:  4393
Total predictions are:  5000
Accuracy on test set is: 0.8786 



EPOCH NUMBER:  1 (RUN:  1 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7594608100467679, 'tp': 2240, 'tn': 2158, 'fp': 335, 'fn': 267, 'auroc': 0.9500436883425166, 'auprc': 0.9479564842832594, 'eval_loss': 0.43529577732086183}
Correct predictions are:  4398
Total predictions are:  5000
Accuracy on test set is: 0.8796 



EPOCH NUMBER:  2 (RUN:  1 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7508343033714782, 'tp': 2209, 'tn': 2168, 'fp': 325, 'fn': 298, 'auroc': 0.9463282192132386, 'auprc': 0.9419131426914098, 'eval_loss': 0.5796194827079773}
Correct predictions are:  4377
Total predictions are:  5000
Accuracy on test set is: 0.8754 



EPOCH NUMBER:  3 (RUN:  1 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7567975296018438, 'tp': 2163, 'tn': 2228, 'fp': 265, 'fn': 344, 'auroc': 0.9486624775138236, 'auprc': 0.944945582995193, 'eval_loss': 0.6182709584236145}
Correct predictions are:  4391
Total predictions are:  5000
Accuracy on test set is: 0.8782 



EPOCH NUMBER:  4 (RUN:  1 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7524107412057858, 'tp': 2206, 'tn': 2175, 'fp': 318, 'fn': 301, 'auroc': 0.9432067547409573, 'auprc': 0.9364803490162389, 'eval_loss': 0.6945564494848251}
Correct predictions are:  4381
Total predictions are:  5000
Accuracy on test set is: 0.8762 



EPOCH NUMBER:  5 (RUN:  1 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7511988470985594, 'tp': 2195, 'tn': 2183, 'fp': 310, 'fn': 312, 'auroc': 0.9423175477695747, 'auprc': 0.9372506192351449, 'eval_loss': 0.7102322512149811}
Correct predictions are:  4378
Total predictions are:  5000
Accuracy on test set is: 0.8756 



EPOCH NUMBER:  6 (RUN:  1 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7577035289425205, 'tp': 2226, 'tn': 2168, 'fp': 325, 'fn': 281, 'auroc': 0.9239141234867282, 'auprc': 0.9205774031116645, 'eval_loss': 0.7398941785097122}
Correct predictions are:  4394
Total predictions are:  5000
Accuracy on test set is: 0.8788 



EPOCH NUMBER:  7 (RUN:  1 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7496100376928049, 'tp': 2188, 'tn': 2186, 'fp': 307, 'fn': 319, 'auroc': 0.9407868957692629, 'auprc': 0.9325297549891238, 'eval_loss': 0.788376808977127}
Correct predictions are:  4374
Total predictions are:  5000
Accuracy on test set is: 0.8748 



EPOCH NUMBER:  8 (RUN:  1 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7504068418915885, 'tp': 2190, 'tn': 2186, 'fp': 307, 'fn': 317, 'auroc': 0.9406966550617755, 'auprc': 0.9343787967709272, 'eval_loss': 0.907803080868721}
Correct predictions are:  4376
Total predictions are:  5000
Accuracy on test set is: 0.8752 



EPOCH NUMBER:  9 (RUN:  1 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7328177013064948, 'tp': 2165, 'tn': 2167, 'fp': 326, 'fn': 342, 'auroc': 0.9381888754007832, 'auprc': 0.9316455885107772, 'eval_loss': 0.9503380717992782}
Correct predictions are:  4332
Total predictions are:  5000
Accuracy on test set is: 0.8664 


[0.8786, 0.8796, 0.8754, 0.8782, 0.8762, 0.8756, 0.8788, 0.8748, 0.8752, 0.8664]

RUN NUMBER:  2


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  2 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7760972488020041, 'tp': 2248, 'tn': 2192, 'fp': 301, 'fn': 259, 'auroc': 0.9522850659149169, 'auprc': 0.9504645207332713, 'eval_loss': 0.32578331146240236}
Correct predictions are:  4440
Total predictions are:  5000
Accuracy on test set is: 0.888 



EPOCH NUMBER:  1 (RUN:  2 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7623987362690925, 'tp': 2213, 'tn': 2193, 'fp': 300, 'fn': 294, 'auroc': 0.9523120261262848, 'auprc': 0.9504587619180912, 'eval_loss': 0.4184016464233398}
Correct predictions are:  4406
Total predictions are:  5000
Accuracy on test set is: 0.8812 



EPOCH NUMBER:  2 (RUN:  2 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7504436945174147, 'tp': 2210, 'tn': 2166, 'fp': 327, 'fn': 297, 'auroc': 0.9483199148281323, 'auprc': 0.9467638548630396, 'eval_loss': 0.5348724220275879}
Correct predictions are:  4376
Total predictions are:  5000
Accuracy on test set is: 0.8752 



EPOCH NUMBER:  3 (RUN:  2 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7524473169938016, 'tp': 2213, 'tn': 2168, 'fp': 325, 'fn': 294, 'auroc': 0.9473006268369144, 'auprc': 0.9445880100607785, 'eval_loss': 0.5796882082939148}
Correct predictions are:  4381
Total predictions are:  5000
Accuracy on test set is: 0.8762 



EPOCH NUMBER:  4 (RUN:  2 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7582319476957344, 'tp': 2154, 'tn': 2240, 'fp': 253, 'fn': 353, 'auroc': 0.94731534695232, 'auprc': 0.9441404435498755, 'eval_loss': 0.6905289686203003}
Correct predictions are:  4394
Total predictions are:  5000
Accuracy on test set is: 0.8788 



EPOCH NUMBER:  5 (RUN:  2 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7347151126942245, 'tp': 2084, 'tn': 2248, 'fp': 245, 'fn': 423, 'auroc': 0.9441276419607129, 'auprc': 0.9398961063047483, 'eval_loss': 0.7966697211503982}
Correct predictions are:  4332
Total predictions are:  5000
Accuracy on test set is: 0.8664 



EPOCH NUMBER:  6 (RUN:  2 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7484791495302745, 'tp': 2238, 'tn': 2132, 'fp': 361, 'fn': 269, 'auroc': 0.9403479323277895, 'auprc': 0.9305344325481366, 'eval_loss': 0.8130127858638764}
Correct predictions are:  4370
Total predictions are:  5000
Accuracy on test set is: 0.874 



EPOCH NUMBER:  7 (RUN:  2 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7365090823639922, 'tp': 2157, 'tn': 2184, 'fp': 309, 'fn': 350, 'auroc': 0.9407589755503684, 'auprc': 0.9305403970833948, 'eval_loss': 0.8300205732345581}
Correct predictions are:  4341
Total predictions are:  5000
Accuracy on test set is: 0.8682 



EPOCH NUMBER:  8 (RUN:  2 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7288005932862346, 'tp': 2096, 'tn': 2223, 'fp': 270, 'fn': 411, 'auroc': 0.9287812816452482, 'auprc': 0.9335961332885224, 'eval_loss': 0.934154088807106}
Correct predictions are:  4319
Total predictions are:  5000
Accuracy on test set is: 0.8638 



EPOCH NUMBER:  9 (RUN:  2 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.724932224540146, 'tp': 2087, 'tn': 2222, 'fp': 271, 'fn': 420, 'auroc': 0.9312914613250569, 'auprc': 0.9333824667439117, 'eval_loss': 0.8113436053276062}
Correct predictions are:  4309
Total predictions are:  5000
Accuracy on test set is: 0.8618 


[0.888, 0.8812, 0.8752, 0.8762, 0.8788, 0.8664, 0.874, 0.8682, 0.8638, 0.8618]

RUN NUMBER:  3


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  3 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7420069996277834, 'tp': 2192, 'tn': 2163, 'fp': 330, 'fn': 315, 'auroc': 0.9375665505217562, 'auprc': 0.9408822792680656, 'eval_loss': 0.3903642738342285}
Correct predictions are:  4355
Total predictions are:  5000
Accuracy on test set is: 0.871 



EPOCH NUMBER:  1 (RUN:  3 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7539812080272342, 'tp': 2240, 'tn': 2144, 'fp': 349, 'fn': 267, 'auroc': 0.9484383957570226, 'auprc': 0.9460018040161364, 'eval_loss': 0.3911374557495117}
Correct predictions are:  4384
Total predictions are:  5000
Accuracy on test set is: 0.8768 



EPOCH NUMBER:  2 (RUN:  3 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7545270377546415, 'tp': 2178, 'tn': 2208, 'fp': 285, 'fn': 329, 'auroc': 0.9489219195478493, 'auprc': 0.9460679608447854, 'eval_loss': 0.4206196403503418}
Correct predictions are:  4386
Total predictions are:  5000
Accuracy on test set is: 0.8772 



EPOCH NUMBER:  3 (RUN:  3 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7473876592672241, 'tp': 2164, 'tn': 2204, 'fp': 289, 'fn': 343, 'auroc': 0.9468842235723127, 'auprc': 0.9447889694927754, 'eval_loss': 0.5126339588165283}
Correct predictions are:  4368
Total predictions are:  5000
Accuracy on test set is: 0.8736 



EPOCH NUMBER:  4 (RUN:  3 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7470376969957256, 'tp': 2160, 'tn': 2207, 'fp': 286, 'fn': 347, 'auroc': 0.9423681881665951, 'auprc': 0.941689949801769, 'eval_loss': 0.5817950847625732}
Correct predictions are:  4367
Total predictions are:  5000
Accuracy on test set is: 0.8734 



EPOCH NUMBER:  5 (RUN:  3 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7490458265836357, 'tp': 2162, 'tn': 2210, 'fp': 283, 'fn': 345, 'auroc': 0.9417989837040323, 'auprc': 0.9373057709057835, 'eval_loss': 0.6569240277290345}
Correct predictions are:  4372
Total predictions are:  5000
Accuracy on test set is: 0.8744 



EPOCH NUMBER:  6 (RUN:  3 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.754040818544608, 'tp': 2214, 'tn': 2171, 'fp': 322, 'fn': 293, 'auroc': 0.9443422036428766, 'auprc': 0.9415685717350654, 'eval_loss': 0.7167728967666626}
Correct predictions are:  4385
Total predictions are:  5000
Accuracy on test set is: 0.877 



EPOCH NUMBER:  7 (RUN:  3 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7520393752833273, 'tp': 2185, 'tn': 2195, 'fp': 298, 'fn': 322, 'auroc': 0.90900376658953, 'auprc': 0.838278491405983, 'eval_loss': 0.722356684589386}
Correct predictions are:  4380
Total predictions are:  5000
Accuracy on test set is: 0.876 



EPOCH NUMBER:  8 (RUN:  3 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7473616471423244, 'tp': 2166, 'tn': 2202, 'fp': 291, 'fn': 341, 'auroc': 0.9381379150012537, 'auprc': 0.9124826724512517, 'eval_loss': 0.7329119826316833}
Correct predictions are:  4368
Total predictions are:  5000
Accuracy on test set is: 0.8736 



EPOCH NUMBER:  9 (RUN:  3 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7251145618372513, 'tp': 2082, 'tn': 2227, 'fp': 266, 'fn': 425, 'auroc': 0.9399899295210473, 'auprc': 0.9334521092371239, 'eval_loss': 0.7785237827301026}
Correct predictions are:  4309
Total predictions are:  5000
Accuracy on test set is: 0.8618 


[0.871, 0.8768, 0.8772, 0.8736, 0.8734, 0.8744, 0.877, 0.876, 0.8736, 0.8618]

RUN NUMBER:  4


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  4 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7580005753179749, 'tp': 2209, 'tn': 2186, 'fp': 307, 'fn': 298, 'auroc': 0.9493909632251517, 'auprc': 0.9474801207265655, 'eval_loss': 0.3209159469604492}
Correct predictions are:  4395
Total predictions are:  5000
Accuracy on test set is: 0.879 



EPOCH NUMBER:  1 (RUN:  4 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7644079548180774, 'tp': 2220, 'tn': 2191, 'fp': 302, 'fn': 287, 'auroc': 0.9492818423696442, 'auprc': 0.9441992487525184, 'eval_loss': 0.4046288284301758}
Correct predictions are:  4411
Total predictions are:  5000
Accuracy on test set is: 0.8822 



EPOCH NUMBER:  2 (RUN:  4 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7628026022142538, 'tp': 2216, 'tn': 2191, 'fp': 302, 'fn': 291, 'auroc': 0.9511284968474153, 'auprc': 0.9484679244789473, 'eval_loss': 0.5204276683807373}
Correct predictions are:  4407
Total predictions are:  5000
Accuracy on test set is: 0.8814 



EPOCH NUMBER:  3 (RUN:  4 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7420053861076453, 'tp': 2180, 'tn': 2175, 'fp': 318, 'fn': 327, 'auroc': 0.9471280654840334, 'auprc': 0.9444605353347925, 'eval_loss': 0.7147129577636718}
Correct predictions are:  4355
Total predictions are:  5000
Accuracy on test set is: 0.871 



EPOCH NUMBER:  4 (RUN:  4 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7440648265188944, 'tp': 2205, 'tn': 2155, 'fp': 338, 'fn': 302, 'auroc': 0.9461840580830154, 'auprc': 0.9444421988491826, 'eval_loss': 0.693927110862732}
Correct predictions are:  4360
Total predictions are:  5000
Accuracy on test set is: 0.872 



EPOCH NUMBER:  5 (RUN:  4 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7388526751332851, 'tp': 2197, 'tn': 2150, 'fp': 343, 'fn': 310, 'auroc': 0.9461560578634937, 'auprc': 0.9456628616725689, 'eval_loss': 0.6863615676879883}
Correct predictions are:  4347
Total predictions are:  5000
Accuracy on test set is: 0.8694 



EPOCH NUMBER:  6 (RUN:  4 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7442091445748173, 'tp': 2218, 'tn': 2142, 'fp': 351, 'fn': 289, 'auroc': 0.9432780352997968, 'auprc': 0.9422600659763226, 'eval_loss': 0.7121925441741943}
Correct predictions are:  4360
Total predictions are:  5000
Accuracy on test set is: 0.872 



EPOCH NUMBER:  7 (RUN:  4 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7344086769327565, 'tp': 2103, 'tn': 2230, 'fp': 263, 'fn': 404, 'auroc': 0.9440194011121048, 'auprc': 0.9402557236408022, 'eval_loss': 0.8025189218521118}
Correct predictions are:  4333
Total predictions are:  5000
Accuracy on test set is: 0.8666 



EPOCH NUMBER:  8 (RUN:  4 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7352008399662285, 'tp': 2181, 'tn': 2157, 'fp': 336, 'fn': 326, 'auroc': 0.9432173948243755, 'auprc': 0.9413459841666196, 'eval_loss': 0.7805612021923065}
Correct predictions are:  4338
Total predictions are:  5000
Accuracy on test set is: 0.8676 



EPOCH NUMBER:  9 (RUN:  4 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7122995566062743, 'tp': 2112, 'tn': 2168, 'fp': 325, 'fn': 395, 'auroc': 0.8934652447675189, 'auprc': 0.8112005879127987, 'eval_loss': 0.7304491938591003}
Correct predictions are:  4280
Total predictions are:  5000
Accuracy on test set is: 0.856 


[0.879, 0.8822, 0.8814, 0.871, 0.872, 0.8694, 0.872, 0.8666, 0.8676, 0.856]

RUN NUMBER:  5


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  5 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7553635249692441, 'tp': 2250, 'tn': 2137, 'fp': 356, 'fn': 257, 'auroc': 0.9484344757262897, 'auprc': 0.9478969338503273, 'eval_loss': 0.37691426010131834}
Correct predictions are:  4387
Total predictions are:  5000
Accuracy on test set is: 0.8774 



EPOCH NUMBER:  1 (RUN:  5 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7612913990073962, 'tp': 2190, 'tn': 2213, 'fp': 280, 'fn': 317, 'auroc': 0.9496356051431443, 'auprc': 0.9471542402096162, 'eval_loss': 0.3829421310424805}
Correct predictions are:  4403
Total predictions are:  5000
Accuracy on test set is: 0.8806 



EPOCH NUMBER:  2 (RUN:  5 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7539990984344684, 'tp': 2203, 'tn': 2182, 'fp': 311, 'fn': 304, 'auroc': 0.9485314364864621, 'auprc': 0.9450987529310418, 'eval_loss': 0.5195993223190307}
Correct predictions are:  4385
Total predictions are:  5000
Accuracy on test set is: 0.877 



EPOCH NUMBER:  3 (RUN:  5 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7388018860270744, 'tp': 2186, 'tn': 2161, 'fp': 332, 'fn': 321, 'auroc': 0.9441664422649074, 'auprc': 0.9434248029068643, 'eval_loss': 0.6241020763397217}
Correct predictions are:  4347
Total predictions are:  5000
Accuracy on test set is: 0.8694 



EPOCH NUMBER:  4 (RUN:  5 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7386587519358689, 'tp': 2148, 'tn': 2198, 'fp': 295, 'fn': 359, 'auroc': 0.9452360506506372, 'auprc': 0.9446484959547817, 'eval_loss': 0.6323475096225738}
Correct predictions are:  4346
Total predictions are:  5000
Accuracy on test set is: 0.8692 



EPOCH NUMBER:  5 (RUN:  5 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7193665318018873, 'tp': 2298, 'tn': 1988, 'fp': 505, 'fn': 209, 'auroc': 0.9310131391430109, 'auprc': 0.9315512047050384, 'eval_loss': 0.6512817723989487}
Correct predictions are:  4286
Total predictions are:  5000
Accuracy on test set is: 0.8572 



EPOCH NUMBER:  6 (RUN:  5 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7390316556510906, 'tp': 2233, 'tn': 2113, 'fp': 380, 'fn': 274, 'auroc': 0.9395854463498994, 'auprc': 0.9341579796209513, 'eval_loss': 0.7618542065143585}
Correct predictions are:  4346
Total predictions are:  5000
Accuracy on test set is: 0.8692 



EPOCH NUMBER:  7 (RUN:  5 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7323310788480664, 'tp': 2211, 'tn': 2119, 'fp': 374, 'fn': 296, 'auroc': 0.9393645646181866, 'auprc': 0.9311233442926329, 'eval_loss': 0.8033668543338776}
Correct predictions are:  4330
Total predictions are:  5000
Accuracy on test set is: 0.866 



EPOCH NUMBER:  8 (RUN:  5 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7332271024451422, 'tp': 2186, 'tn': 2147, 'fp': 346, 'fn': 321, 'auroc': 0.9402907318793379, 'auprc': 0.937303985240487, 'eval_loss': 0.8918741559743881}
Correct predictions are:  4333
Total predictions are:  5000
Accuracy on test set is: 0.8666 



EPOCH NUMBER:  9 (RUN:  5 COMB:  LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7336099429722146, 'tp': 2168, 'tn': 2166, 'fp': 327, 'fn': 339, 'auroc': 0.9383063963221472, 'auprc': 0.9305858475249558, 'eval_loss': 0.845390655374527}
Correct predictions are:  4334
Total predictions are:  5000
Accuracy on test set is: 0.8668 


[0.8774, 0.8806, 0.877, 0.8694, 0.8692, 0.8572, 0.8692, 0.866, 0.8666, 0.8668]


 Over all runs maximum accuracies are: [0.8772, 0.8796, 0.8806, 0.8822, 0.888]
The median is: 0.8806
XLNet Accuracy Score on Test set ->  ['0.8806 +/- 0.007399999999999962']


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a



* * * * EVALUATION USING RSW AS PREPROCESSING FUNCTION * * * *

RUN NUMBER:  1


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  1 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7645608078040103, 'tp': 2320, 'tn': 2084, 'fp': 409, 'fn': 187, 'auroc': 0.9514410592979049, 'auprc': 0.9482385239793699, 'eval_loss': 0.3680738822937012}
Correct predictions are:  4404
Total predictions are:  5000
Accuracy on test set is: 0.8808 



EPOCH NUMBER:  1 (RUN:  1 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7650456802225111, 'tp': 2265, 'tn': 2146, 'fp': 347, 'fn': 242, 'auroc': 0.9535987562142488, 'auprc': 0.9507401117566325, 'eval_loss': 0.40814799308776856}
Correct predictions are:  4411
Total predictions are:  5000
Accuracy on test set is: 0.8822 



EPOCH NUMBER:  2 (RUN:  1 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7685343585302332, 'tp': 2195, 'tn': 2226, 'fp': 267, 'fn': 312, 'auroc': 0.95243466708779, 'auprc': 0.9491208231389312, 'eval_loss': 0.49380685386657713}
Correct predictions are:  4421
Total predictions are:  5000
Accuracy on test set is: 0.8842 



EPOCH NUMBER:  3 (RUN:  1 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.760501123730104, 'tp': 2188, 'tn': 2213, 'fp': 280, 'fn': 319, 'auroc': 0.9492950424731331, 'auprc': 0.9433049758199907, 'eval_loss': 0.5348460397720337}
Correct predictions are:  4401
Total predictions are:  5000
Accuracy on test set is: 0.8802 



EPOCH NUMBER:  4 (RUN:  1 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7531978489090965, 'tp': 2199, 'tn': 2184, 'fp': 309, 'fn': 308, 'auroc': 0.9487155179296605, 'auprc': 0.9454474200756695, 'eval_loss': 0.6130762008666992}
Correct predictions are:  4383
Total predictions are:  5000
Accuracy on test set is: 0.8766 



EPOCH NUMBER:  5 (RUN:  1 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7516009467928848, 'tp': 2194, 'tn': 2185, 'fp': 308, 'fn': 313, 'auroc': 0.9383568767179136, 'auprc': 0.9178731212537956, 'eval_loss': 0.6462575120925903}
Correct predictions are:  4379
Total predictions are:  5000
Accuracy on test set is: 0.8758 



EPOCH NUMBER:  6 (RUN:  1 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7544277172541536, 'tp': 2190, 'tn': 2196, 'fp': 297, 'fn': 317, 'auroc': 0.9470294247106898, 'auprc': 0.9409776589667238, 'eval_loss': 0.7098305664539337}
Correct predictions are:  4386
Total predictions are:  5000
Accuracy on test set is: 0.8772 



EPOCH NUMBER:  7 (RUN:  1 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7568068715877591, 'tp': 2198, 'tn': 2194, 'fp': 299, 'fn': 309, 'auroc': 0.9480427126548672, 'auprc': 0.94469365970076, 'eval_loss': 0.6888250451564789}
Correct predictions are:  4392
Total predictions are:  5000
Accuracy on test set is: 0.8784 



EPOCH NUMBER:  8 (RUN:  1 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7526052443714031, 'tp': 2228, 'tn': 2153, 'fp': 340, 'fn': 279, 'auroc': 0.9378080724152877, 'auprc': 0.9050343065398753, 'eval_loss': 0.8009770909070969}
Correct predictions are:  4381
Total predictions are:  5000
Accuracy on test set is: 0.8762 



EPOCH NUMBER:  9 (RUN:  1 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6943910962010816, 'tp': 2011, 'tn': 2218, 'fp': 275, 'fn': 496, 'auroc': 0.9142901280346039, 'auprc': 0.9032099007469354, 'eval_loss': 0.6444848457336426}
Correct predictions are:  4229
Total predictions are:  5000
Accuracy on test set is: 0.8458 


[0.8808, 0.8822, 0.8842, 0.8802, 0.8766, 0.8758, 0.8772, 0.8784, 0.8762, 0.8458]

RUN NUMBER:  2


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  2 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7673777329031454, 'tp': 2190, 'tn': 2228, 'fp': 265, 'fn': 317, 'auroc': 0.953082752168777, 'auprc': 0.9504038290913542, 'eval_loss': 0.34385000762939455}
Correct predictions are:  4418
Total predictions are:  5000
Accuracy on test set is: 0.8836 



EPOCH NUMBER:  1 (RUN:  2 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7606378576409503, 'tp': 2240, 'tn': 2161, 'fp': 332, 'fn': 267, 'auroc': 0.9531406726228734, 'auprc': 0.9508403494871069, 'eval_loss': 0.4442139841079712}
Correct predictions are:  4401
Total predictions are:  5000
Accuracy on test set is: 0.8802 



EPOCH NUMBER:  2 (RUN:  2 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7713643567824668, 'tp': 2248, 'tn': 2180, 'fp': 313, 'fn': 259, 'auroc': 0.9526808290176996, 'auprc': 0.9502272062623411, 'eval_loss': 0.496629861664772}
Correct predictions are:  4428
Total predictions are:  5000
Accuracy on test set is: 0.8856 



EPOCH NUMBER:  3 (RUN:  2 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7631894912566016, 'tp': 2268, 'tn': 2138, 'fp': 355, 'fn': 239, 'auroc': 0.9508479346478078, 'auprc': 0.9472924570948043, 'eval_loss': 0.6084845812797547}
Correct predictions are:  4406
Total predictions are:  5000
Accuracy on test set is: 0.8812 



EPOCH NUMBER:  4 (RUN:  2 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.77080822772959, 'tp': 2228, 'tn': 2199, 'fp': 294, 'fn': 279, 'auroc': 0.9508148143881449, 'auprc': 0.9474529269356108, 'eval_loss': 0.6677744735717773}
Correct predictions are:  4427
Total predictions are:  5000
Accuracy on test set is: 0.8854 



EPOCH NUMBER:  5 (RUN:  2 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7719087428423442, 'tp': 2258, 'tn': 2171, 'fp': 322, 'fn': 249, 'auroc': 0.9480741529013588, 'auprc': 0.9439997879839164, 'eval_loss': 0.682078784608841}
Correct predictions are:  4429
Total predictions are:  5000
Accuracy on test set is: 0.8858 



EPOCH NUMBER:  6 (RUN:  2 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7596819865209468, 'tp': 2189, 'tn': 2210, 'fp': 283, 'fn': 318, 'auroc': 0.9456866141830551, 'auprc': 0.9433113434980461, 'eval_loss': 0.7366863730430603}
Correct predictions are:  4399
Total predictions are:  5000
Accuracy on test set is: 0.8798 



EPOCH NUMBER:  7 (RUN:  2 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7588819332040105, 'tp': 2188, 'tn': 2209, 'fp': 284, 'fn': 319, 'auroc': 0.9484308756980655, 'auprc': 0.947098307025798, 'eval_loss': 0.6929545159339905}
Correct predictions are:  4397
Total predictions are:  5000
Accuracy on test set is: 0.8794 



EPOCH NUMBER:  8 (RUN:  2 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7541919020114848, 'tp': 2229, 'tn': 2156, 'fp': 337, 'fn': 278, 'auroc': 0.9439382804761189, 'auprc': 0.9418546891955699, 'eval_loss': 0.6680172333717346}
Correct predictions are:  4385
Total predictions are:  5000
Accuracy on test set is: 0.877 



EPOCH NUMBER:  9 (RUN:  2 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7525434012162666, 'tp': 2135, 'tn': 2244, 'fp': 249, 'fn': 372, 'auroc': 0.9459405361738036, 'auprc': 0.9438935770512153, 'eval_loss': 0.7944032460451126}
Correct predictions are:  4379
Total predictions are:  5000
Accuracy on test set is: 0.8758 


[0.8836, 0.8802, 0.8856, 0.8812, 0.8854, 0.8858, 0.8798, 0.8794, 0.877, 0.8758]

RUN NUMBER:  3


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  3 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7460916789599807, 'tp': 2304, 'tn': 2053, 'fp': 440, 'fn': 203, 'auroc': 0.9490692007025335, 'auprc': 0.9452190529834283, 'eval_loss': 0.31745809020996096}
Correct predictions are:  4357
Total predictions are:  5000
Accuracy on test set is: 0.8714 



EPOCH NUMBER:  1 (RUN:  3 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7681174899342424, 'tp': 2196, 'tn': 2224, 'fp': 269, 'fn': 311, 'auroc': 0.9529036307644652, 'auprc': 0.9513346851035396, 'eval_loss': 0.32397093620300293}
Correct predictions are:  4420
Total predictions are:  5000
Accuracy on test set is: 0.884 



EPOCH NUMBER:  2 (RUN:  3 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7603992238773697, 'tp': 2211, 'tn': 2190, 'fp': 303, 'fn': 296, 'auroc': 0.951440659294769, 'auprc': 0.9500923284395271, 'eval_loss': 0.4500400478363037}
Correct predictions are:  4401
Total predictions are:  5000
Accuracy on test set is: 0.8802 



EPOCH NUMBER:  3 (RUN:  3 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7498018544391343, 'tp': 2166, 'tn': 2208, 'fp': 285, 'fn': 341, 'auroc': 0.9505269721314615, 'auprc': 0.9480422779778037, 'eval_loss': 0.5433348230361938}
Correct predictions are:  4374
Total predictions are:  5000
Accuracy on test set is: 0.8748 



EPOCH NUMBER:  4 (RUN:  3 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7685110342092043, 'tp': 2240, 'tn': 2181, 'fp': 312, 'fn': 267, 'auroc': 0.9514837796328324, 'auprc': 0.949254823297094, 'eval_loss': 0.5669507570266724}
Correct predictions are:  4421
Total predictions are:  5000
Accuracy on test set is: 0.8842 



EPOCH NUMBER:  5 (RUN:  3 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7656600538780476, 'tp': 2231, 'tn': 2183, 'fp': 310, 'fn': 276, 'auroc': 0.9504855318065695, 'auprc': 0.9471432642894662, 'eval_loss': 0.6697180639266967}
Correct predictions are:  4414
Total predictions are:  5000
Accuracy on test set is: 0.8828 



EPOCH NUMBER:  6 (RUN:  3 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7534348415622661, 'tp': 2231, 'tn': 2152, 'fp': 341, 'fn': 276, 'auroc': 0.9298241698214914, 'auprc': 0.8868443224643473, 'eval_loss': 0.7618031270980835}
Correct predictions are:  4383
Total predictions are:  5000
Accuracy on test set is: 0.8766 



EPOCH NUMBER:  7 (RUN:  3 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7384497590968393, 'tp': 2137, 'tn': 2208, 'fp': 285, 'fn': 370, 'auroc': 0.9410027374614617, 'auprc': 0.9378071114124045, 'eval_loss': 0.811439253282547}
Correct predictions are:  4345
Total predictions are:  5000
Accuracy on test set is: 0.869 



EPOCH NUMBER:  8 (RUN:  3 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7301789101828283, 'tp': 2105, 'tn': 2218, 'fp': 275, 'fn': 402, 'auroc': 0.923388599366619, 'auprc': 0.8817527015868488, 'eval_loss': 0.7566179364204407}
Correct predictions are:  4323
Total predictions are:  5000
Accuracy on test set is: 0.8646 



EPOCH NUMBER:  9 (RUN:  3 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7265698441272405, 'tp': 2042, 'tn': 2266, 'fp': 227, 'fn': 465, 'auroc': 0.940031449846567, 'auprc': 0.9374726825790936, 'eval_loss': 0.8224995491981506}
Correct predictions are:  4308
Total predictions are:  5000
Accuracy on test set is: 0.8616 


[0.8714, 0.884, 0.8802, 0.8748, 0.8842, 0.8828, 0.8766, 0.869, 0.8646, 0.8616]

RUN NUMBER:  4


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  4 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7709337374316216, 'tp': 2245, 'tn': 2182, 'fp': 311, 'fn': 262, 'auroc': 0.9539157986998617, 'auprc': 0.9515779026871357, 'eval_loss': 0.3128771053314209}
Correct predictions are:  4427
Total predictions are:  5000
Accuracy on test set is: 0.8854 



EPOCH NUMBER:  1 (RUN:  4 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7724117412735656, 'tp': 2231, 'tn': 2200, 'fp': 293, 'fn': 276, 'auroc': 0.9539475589488622, 'auprc': 0.9518611633687857, 'eval_loss': 0.41528333930969236}
Correct predictions are:  4431
Total predictions are:  5000
Accuracy on test set is: 0.8862 



EPOCH NUMBER:  2 (RUN:  4 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7668007900444209, 'tp': 2220, 'tn': 2197, 'fp': 296, 'fn': 287, 'auroc': 0.9524311470601929, 'auprc': 0.9494330207511559, 'eval_loss': 0.482421169090271}
Correct predictions are:  4417
Total predictions are:  5000
Accuracy on test set is: 0.8834 



EPOCH NUMBER:  3 (RUN:  4 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7596084299664829, 'tp': 2201, 'tn': 2198, 'fp': 295, 'fn': 306, 'auroc': 0.9528284301748926, 'auprc': 0.9515078362685803, 'eval_loss': 0.5664635862350463}
Correct predictions are:  4399
Total predictions are:  5000
Accuracy on test set is: 0.8798 



EPOCH NUMBER:  4 (RUN:  4 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.758359716008647, 'tp': 2167, 'tn': 2228, 'fp': 265, 'fn': 340, 'auroc': 0.9519038629262854, 'auprc': 0.9491507510069731, 'eval_loss': 0.626471734046936}
Correct predictions are:  4395
Total predictions are:  5000
Accuracy on test set is: 0.879 



EPOCH NUMBER:  5 (RUN:  4 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7712228086429914, 'tp': 2212, 'tn': 2216, 'fp': 277, 'fn': 295, 'auroc': 0.9509960158087639, 'auprc': 0.9482767098276522, 'eval_loss': 0.6868802856445313}
Correct predictions are:  4428
Total predictions are:  5000
Accuracy on test set is: 0.8856 



EPOCH NUMBER:  6 (RUN:  4 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7636158630879522, 'tp': 2204, 'tn': 2205, 'fp': 288, 'fn': 303, 'auroc': 0.949954487643183, 'auprc': 0.9477384748185893, 'eval_loss': 0.7107810857772827}
Correct predictions are:  4409
Total predictions are:  5000
Accuracy on test set is: 0.8818 



EPOCH NUMBER:  7 (RUN:  4 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7656388372989954, 'tp': 2228, 'tn': 2186, 'fp': 307, 'fn': 279, 'auroc': 0.9484936761904214, 'auprc': 0.9434568760358409, 'eval_loss': 0.722411682844162}
Correct predictions are:  4414
Total predictions are:  5000
Accuracy on test set is: 0.8828 



EPOCH NUMBER:  8 (RUN:  4 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7544164031834758, 'tp': 2159, 'tn': 2226, 'fp': 267, 'fn': 348, 'auroc': 0.947524148589325, 'auprc': 0.942888250084205, 'eval_loss': 0.7699180327892303}
Correct predictions are:  4385
Total predictions are:  5000
Accuracy on test set is: 0.877 



EPOCH NUMBER:  9 (RUN:  4 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.748183060518483, 'tp': 2221, 'tn': 2149, 'fp': 344, 'fn': 286, 'auroc': 0.949131441190499, 'auprc': 0.9462985793452361, 'eval_loss': 0.8231665201425552}
Correct predictions are:  4370
Total predictions are:  5000
Accuracy on test set is: 0.874 


[0.8854, 0.8862, 0.8834, 0.8798, 0.879, 0.8856, 0.8818, 0.8828, 0.877, 0.874]

RUN NUMBER:  5


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  5 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7745177446213405, 'tp': 2248, 'tn': 2188, 'fp': 305, 'fn': 259, 'auroc': 0.9533229140516462, 'auprc': 0.949654459544981, 'eval_loss': 0.3111481773376465}
Correct predictions are:  4436
Total predictions are:  5000
Accuracy on test set is: 0.8872 



EPOCH NUMBER:  1 (RUN:  5 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7765351648461838, 'tp': 2252, 'tn': 2189, 'fp': 304, 'fn': 255, 'auroc': 0.9555109312057006, 'auprc': 0.951618899339984, 'eval_loss': 0.3553078357696533}
Correct predictions are:  4441
Total predictions are:  5000
Accuracy on test set is: 0.8882 



EPOCH NUMBER:  2 (RUN:  5 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7756254156788185, 'tp': 2217, 'tn': 2222, 'fp': 271, 'fn': 290, 'auroc': 0.9547996456292217, 'auprc': 0.9529186786560354, 'eval_loss': 0.48919870314598085}
Correct predictions are:  4439
Total predictions are:  5000
Accuracy on test set is: 0.8878 



EPOCH NUMBER:  3 (RUN:  5 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7596414211973253, 'tp': 2221, 'tn': 2178, 'fp': 315, 'fn': 286, 'auroc': 0.9487123179045723, 'auprc': 0.9470086500676429, 'eval_loss': 0.5854933232784271}
Correct predictions are:  4399
Total predictions are:  5000
Accuracy on test set is: 0.8798 



EPOCH NUMBER:  4 (RUN:  5 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7628429234174483, 'tp': 2198, 'tn': 2209, 'fp': 284, 'fn': 309, 'auroc': 0.9493432028507105, 'auprc': 0.94809154573466, 'eval_loss': 0.6672591962099075}
Correct predictions are:  4407
Total predictions are:  5000
Accuracy on test set is: 0.8814 



EPOCH NUMBER:  5 (RUN:  5 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7616384295597328, 'tp': 2223, 'tn': 2181, 'fp': 312, 'fn': 284, 'auroc': 0.9478035907801519, 'auprc': 0.9444456859789581, 'eval_loss': 0.7415993120908737}
Correct predictions are:  4404
Total predictions are:  5000
Accuracy on test set is: 0.8808 



EPOCH NUMBER:  6 (RUN:  5 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7619983252761491, 'tp': 2212, 'tn': 2193, 'fp': 300, 'fn': 295, 'auroc': 0.9466393416524385, 'auprc': 0.9434401782368724, 'eval_loss': 0.7184584836959839}
Correct predictions are:  4405
Total predictions are:  5000
Accuracy on test set is: 0.881 



EPOCH NUMBER:  7 (RUN:  5 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7564019028330015, 'tp': 2145, 'tn': 2244, 'fp': 249, 'fn': 362, 'auroc': 0.9464499001672174, 'auprc': 0.9447396107088367, 'eval_loss': 0.8109019260406494}
Correct predictions are:  4389
Total predictions are:  5000
Accuracy on test set is: 0.8778 



EPOCH NUMBER:  8 (RUN:  5 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7380455967626429, 'tp': 2195, 'tn': 2150, 'fp': 343, 'fn': 312, 'auroc': 0.9390274419751451, 'auprc': 0.9339329446553502, 'eval_loss': 0.7446291259765625}
Correct predictions are:  4345
Total predictions are:  5000
Accuracy on test set is: 0.869 



EPOCH NUMBER:  9 (RUN:  5 COMB:  RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7372388987366597, 'tp': 2136, 'tn': 2206, 'fp': 287, 'fn': 371, 'auroc': 0.9397923279718513, 'auprc': 0.936355604894662, 'eval_loss': 0.7860926508903503}
Correct predictions are:  4342
Total predictions are:  5000
Accuracy on test set is: 0.8684 


[0.8872, 0.8882, 0.8878, 0.8798, 0.8814, 0.8808, 0.881, 0.8778, 0.869, 0.8684]


 Over all runs maximum accuracies are: [0.8842, 0.8842, 0.8858, 0.8862, 0.8882]
The median is: 0.8858
XLNet Accuracy Score on Test set ->  ['0.8858 +/- 0.0023999999999999577']


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a



* * * * EVALUATION USING STM AS PREPROCESSING FUNCTION * * * *

RUN NUMBER:  1


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  1 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.27943116795664985, 'tp': 1870, 'tn': 1313, 'fp': 1180, 'fn': 637, 'auroc': 0.6985216364096295, 'auprc': 0.6830316163452801, 'eval_loss': 0.675272265625}
Correct predictions are:  3183
Total predictions are:  5000
Accuracy on test set is: 0.6366 



EPOCH NUMBER:  1 (RUN:  1 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5296038214088027, 'tp': 1790, 'tn': 2027, 'fp': 466, 'fn': 717, 'auroc': 0.835887593358732, 'auprc': 0.8388096348960634, 'eval_loss': 0.519357666015625}
Correct predictions are:  3817
Total predictions are:  5000
Accuracy on test set is: 0.7634 



EPOCH NUMBER:  2 (RUN:  1 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6004143877464976, 'tp': 2000, 'tn': 2001, 'fp': 492, 'fn': 507, 'auroc': 0.8796951368098725, 'auprc': 0.8814297926533663, 'eval_loss': 0.4577580261230469}
Correct predictions are:  4001
Total predictions are:  5000
Accuracy on test set is: 0.8002 



EPOCH NUMBER:  3 (RUN:  1 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.640025722078086, 'tp': 2047, 'tn': 2053, 'fp': 440, 'fn': 460, 'auroc': 0.8963312672371352, 'auprc': 0.8944512744262552, 'eval_loss': 0.43902542114257814}
Correct predictions are:  4100
Total predictions are:  5000
Accuracy on test set is: 0.82 



EPOCH NUMBER:  4 (RUN:  1 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6283587880221672, 'tp': 1982, 'tn': 2087, 'fp': 406, 'fn': 525, 'auroc': 0.8920704338322012, 'auprc': 0.882237876327263, 'eval_loss': 0.5664446563720703}
Correct predictions are:  4069
Total predictions are:  5000
Accuracy on test set is: 0.8138 



EPOCH NUMBER:  5 (RUN:  1 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6314750345394168, 'tp': 1974, 'tn': 2102, 'fp': 391, 'fn': 533, 'auroc': 0.894793495181002, 'auprc': 0.8920224829579653, 'eval_loss': 0.6192562759399414}
Correct predictions are:  4076
Total predictions are:  5000
Accuracy on test set is: 0.8152 



EPOCH NUMBER:  6 (RUN:  1 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6144189788306041, 'tp': 2038, 'tn': 1998, 'fp': 495, 'fn': 469, 'auroc': 0.890872984444198, 'auprc': 0.889118507618562, 'eval_loss': 0.6406284774780273}
Correct predictions are:  4036
Total predictions are:  5000
Accuracy on test set is: 0.8072 



EPOCH NUMBER:  7 (RUN:  1 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6276812550156069, 'tp': 2023, 'tn': 2046, 'fp': 447, 'fn': 484, 'auroc': 0.8965891892592439, 'auprc': 0.8921355584965274, 'eval_loss': 0.6762964660644532}
Correct predictions are:  4069
Total predictions are:  5000
Accuracy on test set is: 0.8138 



EPOCH NUMBER:  8 (RUN:  1 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6214820288296095, 'tp': 1926, 'tn': 2122, 'fp': 371, 'fn': 581, 'auroc': 0.8723244390236019, 'auprc': 0.8650265106048476, 'eval_loss': 0.7566078918457031}
Correct predictions are:  4048
Total predictions are:  5000
Accuracy on test set is: 0.8096 



EPOCH NUMBER:  9 (RUN:  1 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6319970534380583, 'tp': 2051, 'tn': 2029, 'fp': 464, 'fn': 456, 'auroc': 0.8873119965260529, 'auprc': 0.8792889862726062, 'eval_loss': 0.7758638046264649}
Correct predictions are:  4080
Total predictions are:  5000
Accuracy on test set is: 0.816 


[0.6366, 0.7634, 0.8002, 0.82, 0.8138, 0.8152, 0.8072, 0.8138, 0.8096, 0.816]

RUN NUMBER:  2


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  2 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7033320233794484, 'tp': 2113, 'tn': 2145, 'fp': 348, 'fn': 394, 'auroc': 0.9313173015276439, 'auprc': 0.9290685712986266, 'eval_loss': 0.35505815925598144}
Correct predictions are:  4258
Total predictions are:  5000
Accuracy on test set is: 0.8516 



EPOCH NUMBER:  1 (RUN:  2 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6996779587663176, 'tp': 2152, 'tn': 2097, 'fp': 396, 'fn': 355, 'auroc': 0.9306281761249008, 'auprc': 0.9305027532601464, 'eval_loss': 0.4050149757385254}
Correct predictions are:  4249
Total predictions are:  5000
Accuracy on test set is: 0.8498 



EPOCH NUMBER:  2 (RUN:  2 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6858608464140319, 'tp': 2045, 'tn': 2167, 'fp': 326, 'fn': 462, 'auroc': 0.9264777435855096, 'auprc': 0.9275376966019451, 'eval_loss': 0.6521586489915848}
Correct predictions are:  4212
Total predictions are:  5000
Accuracy on test set is: 0.8424 



EPOCH NUMBER:  3 (RUN:  2 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6916004775256217, 'tp': 2127, 'tn': 2102, 'fp': 391, 'fn': 380, 'auroc': 0.9276991131610471, 'auprc': 0.9258429017692339, 'eval_loss': 0.6716223039150238}
Correct predictions are:  4229
Total predictions are:  5000
Accuracy on test set is: 0.8458 



EPOCH NUMBER:  4 (RUN:  2 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6772101590412128, 'tp': 2113, 'tn': 2080, 'fp': 413, 'fn': 394, 'auroc': 0.9039322868291287, 'auprc': 0.90931921429731, 'eval_loss': 0.8443586922168732}
Correct predictions are:  4193
Total predictions are:  5000
Accuracy on test set is: 0.8386 



EPOCH NUMBER:  5 (RUN:  2 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6810015065149067, 'tp': 2079, 'tn': 2123, 'fp': 370, 'fn': 428, 'auroc': 0.9232797185129931, 'auprc': 0.9220658147649965, 'eval_loss': 0.8487563581466675}
Correct predictions are:  4202
Total predictions are:  5000
Accuracy on test set is: 0.8404 



EPOCH NUMBER:  6 (RUN:  2 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.685355619855132, 'tp': 2142, 'tn': 2071, 'fp': 422, 'fn': 365, 'auroc': 0.8511431529623191, 'auprc': 0.8359134986356793, 'eval_loss': 0.8383859159469604}
Correct predictions are:  4213
Total predictions are:  5000
Accuracy on test set is: 0.8426 



EPOCH NUMBER:  7 (RUN:  2 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6829321891295057, 'tp': 2137, 'tn': 2070, 'fp': 423, 'fn': 370, 'auroc': 0.8859293456860702, 'auprc': 0.9049976161701726, 'eval_loss': 0.8585352788925171}
Correct predictions are:  4207
Total predictions are:  5000
Accuracy on test set is: 0.8414 



EPOCH NUMBER:  8 (RUN:  2 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6825188687464091, 'tp': 2088, 'tn': 2118, 'fp': 375, 'fn': 419, 'auroc': 0.9186724823922622, 'auprc': 0.9202674306009682, 'eval_loss': 0.9214113467216491}
Correct predictions are:  4206
Total predictions are:  5000
Accuracy on test set is: 0.8412 



EPOCH NUMBER:  9 (RUN:  2 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6793883572245528, 'tp': 2055, 'tn': 2142, 'fp': 351, 'fn': 452, 'auroc': 0.8902233793512941, 'auprc': 0.9076644210229508, 'eval_loss': 0.9927915824413299}
Correct predictions are:  4197
Total predictions are:  5000
Accuracy on test set is: 0.8394 


[0.8516, 0.8498, 0.8424, 0.8458, 0.8386, 0.8404, 0.8426, 0.8414, 0.8412, 0.8394]

RUN NUMBER:  3


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  3 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7048671716226572, 'tp': 2241, 'tn': 2015, 'fp': 478, 'fn': 266, 'auroc': 0.9321323479176078, 'auprc': 0.9321252563522876, 'eval_loss': 0.3654550735473633}
Correct predictions are:  4256
Total predictions are:  5000
Accuracy on test set is: 0.8512 



EPOCH NUMBER:  1 (RUN:  3 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6986470357582063, 'tp': 2062, 'tn': 2182, 'fp': 311, 'fn': 445, 'auroc': 0.9296454484203156, 'auprc': 0.9275135337899141, 'eval_loss': 0.5094794378280639}
Correct predictions are:  4244
Total predictions are:  5000
Accuracy on test set is: 0.8488 



EPOCH NUMBER:  2 (RUN:  3 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6891979665465452, 'tp': 2118, 'tn': 2105, 'fp': 388, 'fn': 389, 'auroc': 0.9254352554124025, 'auprc': 0.9249413870566351, 'eval_loss': 0.6350123054504394}
Correct predictions are:  4223
Total predictions are:  5000
Accuracy on test set is: 0.8446 



EPOCH NUMBER:  3 (RUN:  3 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7001136232116408, 'tp': 2181, 'tn': 2068, 'fp': 425, 'fn': 326, 'auroc': 0.9264026229965643, 'auprc': 0.9265559527070898, 'eval_loss': 0.7414806223869324}
Correct predictions are:  4249
Total predictions are:  5000
Accuracy on test set is: 0.8498 



EPOCH NUMBER:  4 (RUN:  3 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7032067567989186, 'tp': 2144, 'tn': 2114, 'fp': 379, 'fn': 363, 'auroc': 0.930970258806829, 'auprc': 0.9303065847110377, 'eval_loss': 0.731525293636322}
Correct predictions are:  4258
Total predictions are:  5000
Accuracy on test set is: 0.8516 



EPOCH NUMBER:  5 (RUN:  3 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7019991703981324, 'tp': 2133, 'tn': 2122, 'fp': 371, 'fn': 374, 'auroc': 0.9261431809625387, 'auprc': 0.9238542086325578, 'eval_loss': 0.8437421813011169}
Correct predictions are:  4255
Total predictions are:  5000
Accuracy on test set is: 0.851 



EPOCH NUMBER:  6 (RUN:  3 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6959997799623403, 'tp': 2132, 'tn': 2108, 'fp': 385, 'fn': 375, 'auroc': 0.9235646007464697, 'auprc': 0.909485225370708, 'eval_loss': 0.9599936383008957}
Correct predictions are:  4240
Total predictions are:  5000
Accuracy on test set is: 0.848 



EPOCH NUMBER:  7 (RUN:  3 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6875840502219203, 'tp': 2190, 'tn': 2026, 'fp': 467, 'fn': 317, 'auroc': 0.9188334436541984, 'auprc': 0.9150582807792844, 'eval_loss': 0.9781566869020462}
Correct predictions are:  4216
Total predictions are:  5000
Accuracy on test set is: 0.8432 



EPOCH NUMBER:  8 (RUN:  3 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6835977185625547, 'tp': 2115, 'tn': 2094, 'fp': 399, 'fn': 392, 'auroc': 0.9206427378390646, 'auprc': 0.9157527583507087, 'eval_loss': 1.0884596400260926}
Correct predictions are:  4209
Total predictions are:  5000
Accuracy on test set is: 0.8418 



EPOCH NUMBER:  9 (RUN:  3 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.691905553463861, 'tp': 2066, 'tn': 2162, 'fp': 331, 'fn': 441, 'auroc': 0.9218398672245591, 'auprc': 0.9162872904937827, 'eval_loss': 1.0102153011322021}
Correct predictions are:  4228
Total predictions are:  5000
Accuracy on test set is: 0.8456 


[0.8512, 0.8488, 0.8446, 0.8498, 0.8516, 0.851, 0.848, 0.8432, 0.8418, 0.8456]

RUN NUMBER:  4


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  4 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6995464896495678, 'tp': 2172, 'tn': 2076, 'fp': 417, 'fn': 335, 'auroc': 0.9295802479091437, 'auprc': 0.928753195848907, 'eval_loss': 0.34934647426605225}
Correct predictions are:  4248
Total predictions are:  5000
Accuracy on test set is: 0.8496 



EPOCH NUMBER:  1 (RUN:  4 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.699137572434429, 'tp': 2093, 'tn': 2154, 'fp': 339, 'fn': 414, 'auroc': 0.9292878456167095, 'auprc': 0.9293018331362768, 'eval_loss': 0.44054892692565917}
Correct predictions are:  4247
Total predictions are:  5000
Accuracy on test set is: 0.8494 



EPOCH NUMBER:  2 (RUN:  4 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6916051724104126, 'tp': 2117, 'tn': 2112, 'fp': 381, 'fn': 390, 'auroc': 0.9263122222878228, 'auprc': 0.9251493575990388, 'eval_loss': 0.5884819557189941}
Correct predictions are:  4229
Total predictions are:  5000
Accuracy on test set is: 0.8458 



EPOCH NUMBER:  3 (RUN:  4 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6819352572332639, 'tp': 2150, 'tn': 2054, 'fp': 439, 'fn': 357, 'auroc': 0.9195991296571764, 'auprc': 0.917394621436189, 'eval_loss': 0.7347508295059204}
Correct predictions are:  4204
Total predictions are:  5000
Accuracy on test set is: 0.8408 



EPOCH NUMBER:  4 (RUN:  4 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6925462785399528, 'tp': 2150, 'tn': 2081, 'fp': 412, 'fn': 357, 'auroc': 0.9222805106792039, 'auprc': 0.9186799626722888, 'eval_loss': 0.8137398749351501}
Correct predictions are:  4231
Total predictions are:  5000
Accuracy on test set is: 0.8462 



EPOCH NUMBER:  5 (RUN:  4 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6964796383891039, 'tp': 2082, 'tn': 2158, 'fp': 335, 'fn': 425, 'auroc': 0.922879875378223, 'auprc': 0.923527727464506, 'eval_loss': 0.8671663763523102}
Correct predictions are:  4240
Total predictions are:  5000
Accuracy on test set is: 0.848 



EPOCH NUMBER:  6 (RUN:  4 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6912038848206354, 'tp': 2117, 'tn': 2111, 'fp': 382, 'fn': 390, 'auroc': 0.9228519551593284, 'auprc': 0.9226649377743714, 'eval_loss': 0.9277350099086762}
Correct predictions are:  4228
Total predictions are:  5000
Accuracy on test set is: 0.8456 



EPOCH NUMBER:  7 (RUN:  4 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6911974945161553, 'tp': 2124, 'tn': 2104, 'fp': 389, 'fn': 383, 'auroc': 0.915342056281721, 'auprc': 0.902101163795165, 'eval_loss': 0.9343418436050415}
Correct predictions are:  4228
Total predictions are:  5000
Accuracy on test set is: 0.8456 



EPOCH NUMBER:  8 (RUN:  4 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.687712530129555, 'tp': 2141, 'tn': 2078, 'fp': 415, 'fn': 366, 'auroc': 0.9153019759674915, 'auprc': 0.8996796987527993, 'eval_loss': 0.9914434465885162}
Correct predictions are:  4219
Total predictions are:  5000
Accuracy on test set is: 0.8438 



EPOCH NUMBER:  9 (RUN:  4 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.673381551973853, 'tp': 1971, 'tn': 2204, 'fp': 289, 'fn': 536, 'auroc': 0.9196981704336561, 'auprc': 0.9169303845786331, 'eval_loss': 1.0549053915500641}
Correct predictions are:  4175
Total predictions are:  5000
Accuracy on test set is: 0.835 


[0.8496, 0.8494, 0.8458, 0.8408, 0.8462, 0.848, 0.8456, 0.8456, 0.8438, 0.835]

RUN NUMBER:  5


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  5 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7141390147169341, 'tp': 2126, 'tn': 2159, 'fp': 334, 'fn': 381, 'auroc': 0.9336558798620982, 'auprc': 0.9325225807973692, 'eval_loss': 0.34777762908935544}
Correct predictions are:  4285
Total predictions are:  5000
Accuracy on test set is: 0.857 



EPOCH NUMBER:  1 (RUN:  5 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6969863051934932, 'tp': 2226, 'tn': 2011, 'fp': 482, 'fn': 281, 'auroc': 0.9297736094250979, 'auprc': 0.9294764198904123, 'eval_loss': 0.4059912197113037}
Correct predictions are:  4237
Total predictions are:  5000
Accuracy on test set is: 0.8474 



EPOCH NUMBER:  2 (RUN:  5 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6940781713035107, 'tp': 2188, 'tn': 2045, 'fp': 448, 'fn': 319, 'auroc': 0.9291256843453651, 'auprc': 0.9274664109993781, 'eval_loss': 0.6142357471466064}
Correct predictions are:  4233
Total predictions are:  5000
Accuracy on test set is: 0.8466 



EPOCH NUMBER:  3 (RUN:  5 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.686807460983468, 'tp': 2124, 'tn': 2093, 'fp': 400, 'fn': 383, 'auroc': 0.9257310977318063, 'auprc': 0.9259688288632557, 'eval_loss': 0.7073089089870452}
Correct predictions are:  4217
Total predictions are:  5000
Accuracy on test set is: 0.8434 



EPOCH NUMBER:  4 (RUN:  5 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6871970668517471, 'tp': 2117, 'tn': 2101, 'fp': 392, 'fn': 390, 'auroc': 0.9247415699739087, 'auprc': 0.9229147189267172, 'eval_loss': 0.8388196427822113}
Correct predictions are:  4218
Total predictions are:  5000
Accuracy on test set is: 0.8436 



EPOCH NUMBER:  5 (RUN:  5 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.681451047364098, 'tp': 2076, 'tn': 2127, 'fp': 366, 'fn': 431, 'auroc': 0.9232025979083678, 'auprc': 0.9211694843708443, 'eval_loss': 0.8735991837978363}
Correct predictions are:  4203
Total predictions are:  5000
Accuracy on test set is: 0.8406 



EPOCH NUMBER:  6 (RUN:  5 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6996349642039268, 'tp': 2146, 'tn': 2103, 'fp': 390, 'fn': 361, 'auroc': 0.9264300632116955, 'auprc': 0.9215477668046089, 'eval_loss': 0.8911579501628876}
Correct predictions are:  4249
Total predictions are:  5000
Accuracy on test set is: 0.8498 



EPOCH NUMBER:  7 (RUN:  5 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6919996717986783, 'tp': 2127, 'tn': 2103, 'fp': 390, 'fn': 380, 'auroc': 0.9231626775953924, 'auprc': 0.9186791015329482, 'eval_loss': 0.9487187272787094}
Correct predictions are:  4230
Total predictions are:  5000
Accuracy on test set is: 0.846 



EPOCH NUMBER:  8 (RUN:  5 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6860335006187565, 'tp': 2129, 'tn': 2086, 'fp': 407, 'fn': 378, 'auroc': 0.9174479927922635, 'auprc': 0.9108783991874436, 'eval_loss': 1.0774547153234482}
Correct predictions are:  4215
Total predictions are:  5000
Accuracy on test set is: 0.843 



EPOCH NUMBER:  9 (RUN:  5 COMB:  STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6692373778804405, 'tp': 2109, 'tn': 2064, 'fp': 429, 'fn': 398, 'auroc': 0.9082672008148543, 'auprc': 0.8869428738172217, 'eval_loss': 1.164407248210907}
Correct predictions are:  4173
Total predictions are:  5000
Accuracy on test set is: 0.8346 


[0.857, 0.8474, 0.8466, 0.8434, 0.8436, 0.8406, 0.8498, 0.846, 0.843, 0.8346]


 Over all runs maximum accuracies are: [0.82, 0.8496, 0.8516, 0.8516, 0.857]
The median is: 0.8516
XLNet Accuracy Score on Test set ->  ['0.8516 +/- 0.03160000000000007']


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a



* * * * EVALUATION USING LOW_RSW AS PREPROCESSING FUNCTION * * * *

RUN NUMBER:  1


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  1 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7521585634862773, 'tp': 2224, 'tn': 2156, 'fp': 337, 'fn': 283, 'auroc': 0.9457599747582021, 'auprc': 0.9428027257488132, 'eval_loss': 0.3131959732055664}
Correct predictions are:  4380
Total predictions are:  5000
Accuracy on test set is: 0.876 



EPOCH NUMBER:  1 (RUN:  1 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7494759337457592, 'tp': 2141, 'tn': 2231, 'fp': 262, 'fn': 366, 'auroc': 0.9494575237469861, 'auprc': 0.9458151360344175, 'eval_loss': 0.3847262992858887}
Correct predictions are:  4372
Total predictions are:  5000
Accuracy on test set is: 0.8744 



EPOCH NUMBER:  2 (RUN:  1 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7564076136786867, 'tp': 2210, 'tn': 2181, 'fp': 312, 'fn': 297, 'auroc': 0.9494120033901066, 'auprc': 0.9480673231662242, 'eval_loss': 0.4470484676361084}
Correct predictions are:  4391
Total predictions are:  5000
Accuracy on test set is: 0.8782 



EPOCH NUMBER:  3 (RUN:  1 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7563982388106523, 'tp': 2205, 'tn': 2186, 'fp': 307, 'fn': 302, 'auroc': 0.9465462209223722, 'auprc': 0.9467242883173984, 'eval_loss': 0.5622508686065674}
Correct predictions are:  4391
Total predictions are:  5000
Accuracy on test set is: 0.8782 



EPOCH NUMBER:  4 (RUN:  1 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7628806213555026, 'tp': 2230, 'tn': 2177, 'fp': 316, 'fn': 277, 'auroc': 0.9505180920618418, 'auprc': 0.9494220574135911, 'eval_loss': 0.587637744140625}
Correct predictions are:  4407
Total predictions are:  5000
Accuracy on test set is: 0.8814 



EPOCH NUMBER:  5 (RUN:  1 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7392361462198629, 'tp': 2195, 'tn': 2153, 'fp': 340, 'fn': 312, 'auroc': 0.9464721403415802, 'auprc': 0.9457894573677148, 'eval_loss': 0.7005442596435547}
Correct predictions are:  4348
Total predictions are:  5000
Accuracy on test set is: 0.8696 



EPOCH NUMBER:  6 (RUN:  1 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7420302238436989, 'tp': 2174, 'tn': 2181, 'fp': 312, 'fn': 333, 'auroc': 0.9460652571516159, 'auprc': 0.9441425906537435, 'eval_loss': 0.7096290903091431}
Correct predictions are:  4355
Total predictions are:  5000
Accuracy on test set is: 0.871 



EPOCH NUMBER:  7 (RUN:  1 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7396359672190191, 'tp': 2170, 'tn': 2179, 'fp': 314, 'fn': 337, 'auroc': 0.9425952299466028, 'auprc': 0.932576834977382, 'eval_loss': 0.6858498613357544}
Correct predictions are:  4349
Total predictions are:  5000
Accuracy on test set is: 0.8698 



EPOCH NUMBER:  8 (RUN:  1 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7448367170548305, 'tp': 2202, 'tn': 2160, 'fp': 333, 'fn': 305, 'auroc': 0.9338021210086287, 'auprc': 0.9092311624956366, 'eval_loss': 0.700813290309906}
Correct predictions are:  4362
Total predictions are:  5000
Accuracy on test set is: 0.8724 



EPOCH NUMBER:  9 (RUN:  1 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7411980041185897, 'tp': 2186, 'tn': 2167, 'fp': 326, 'fn': 321, 'auroc': 0.9392512037294374, 'auprc': 0.9216230156156714, 'eval_loss': 0.7958662504673004}
Correct predictions are:  4353
Total predictions are:  5000
Accuracy on test set is: 0.8706 


[0.876, 0.8744, 0.8782, 0.8782, 0.8814, 0.8696, 0.871, 0.8698, 0.8724, 0.8706]

RUN NUMBER:  2


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  2 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7524907308402723, 'tp': 2179, 'tn': 2202, 'fp': 291, 'fn': 328, 'auroc': 0.9470483048587102, 'auprc': 0.9437178012393124, 'eval_loss': 0.33054087677001953}
Correct predictions are:  4381
Total predictions are:  5000
Accuracy on test set is: 0.8762 



EPOCH NUMBER:  1 (RUN:  2 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7612364942748412, 'tp': 2197, 'tn': 2206, 'fp': 287, 'fn': 310, 'auroc': 0.9499670477416542, 'auprc': 0.9452734798146847, 'eval_loss': 0.3798532150268555}
Correct predictions are:  4403
Total predictions are:  5000
Accuracy on test set is: 0.8806 



EPOCH NUMBER:  2 (RUN:  2 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7568319445999611, 'tp': 2216, 'tn': 2176, 'fp': 317, 'fn': 291, 'auroc': 0.947229026275566, 'auprc': 0.9417777239015725, 'eval_loss': 0.4766589834213257}
Correct predictions are:  4392
Total predictions are:  5000
Accuracy on test set is: 0.8784 



EPOCH NUMBER:  3 (RUN:  2 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7542549271620212, 'tp': 2168, 'tn': 2217, 'fp': 276, 'fn': 339, 'auroc': 0.9491943216834821, 'auprc': 0.9459244006805049, 'eval_loss': 0.5678299861907959}
Correct predictions are:  4385
Total predictions are:  5000
Accuracy on test set is: 0.877 



EPOCH NUMBER:  4 (RUN:  2 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7608130238460318, 'tp': 2217, 'tn': 2185, 'fp': 308, 'fn': 290, 'auroc': 0.948473996036129, 'auprc': 0.9463568558591269, 'eval_loss': 0.6058760407447815}
Correct predictions are:  4402
Total predictions are:  5000
Accuracy on test set is: 0.8804 



EPOCH NUMBER:  5 (RUN:  2 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7541654720622799, 'tp': 2227, 'tn': 2158, 'fp': 335, 'fn': 280, 'auroc': 0.9485272764538474, 'auprc': 0.9459422806229233, 'eval_loss': 0.6737088316440583}
Correct predictions are:  4385
Total predictions are:  5000
Accuracy on test set is: 0.877 



EPOCH NUMBER:  6 (RUN:  2 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7395595797802781, 'tp': 2125, 'tn': 2222, 'fp': 271, 'fn': 382, 'auroc': 0.9452932510990886, 'auprc': 0.9429762743978111, 'eval_loss': 0.852134567284584}
Correct predictions are:  4347
Total predictions are:  5000
Accuracy on test set is: 0.8694 



EPOCH NUMBER:  7 (RUN:  2 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7424040896218321, 'tp': 2181, 'tn': 2175, 'fp': 318, 'fn': 326, 'auroc': 0.9436215579930146, 'auprc': 0.9407345964695105, 'eval_loss': 0.7790055483818055}
Correct predictions are:  4356
Total predictions are:  5000
Accuracy on test set is: 0.8712 



EPOCH NUMBER:  8 (RUN:  2 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7492493133307319, 'tp': 2180, 'tn': 2193, 'fp': 300, 'fn': 327, 'auroc': 0.9394205650572302, 'auprc': 0.9173801677344998, 'eval_loss': 0.8180459562778473}
Correct predictions are:  4373
Total predictions are:  5000
Accuracy on test set is: 0.8746 



EPOCH NUMBER:  9 (RUN:  2 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.748487147402363, 'tp': 2213, 'tn': 2158, 'fp': 335, 'fn': 294, 'auroc': 0.943400676261302, 'auprc': 0.9400267480053118, 'eval_loss': 0.9380664487838745}
Correct predictions are:  4371
Total predictions are:  5000
Accuracy on test set is: 0.8742 


[0.8762, 0.8806, 0.8784, 0.877, 0.8804, 0.877, 0.8694, 0.8712, 0.8746, 0.8742]

RUN NUMBER:  3


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  3 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7323863011554502, 'tp': 2214, 'tn': 2116, 'fp': 377, 'fn': 293, 'auroc': 0.9392169634609935, 'auprc': 0.9350992731829694, 'eval_loss': 0.33213308563232424}
Correct predictions are:  4330
Total predictions are:  5000
Accuracy on test set is: 0.866 



EPOCH NUMBER:  1 (RUN:  3 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7436200639573518, 'tp': 2178, 'tn': 2181, 'fp': 312, 'fn': 329, 'auroc': 0.9397958479994483, 'auprc': 0.9378276552978129, 'eval_loss': 0.4061992324829102}
Correct predictions are:  4359
Total predictions are:  5000
Accuracy on test set is: 0.8718 



EPOCH NUMBER:  2 (RUN:  3 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7356596670030765, 'tp': 2194, 'tn': 2145, 'fp': 348, 'fn': 313, 'auroc': 0.9429267525457399, 'auprc': 0.9400136420386465, 'eval_loss': 0.44978760452270505}
Correct predictions are:  4339
Total predictions are:  5000
Accuracy on test set is: 0.8678 



EPOCH NUMBER:  3 (RUN:  3 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7327975410968286, 'tp': 2174, 'tn': 2158, 'fp': 335, 'fn': 333, 'auroc': 0.9414222607505243, 'auprc': 0.9376492233055266, 'eval_loss': 0.575267700958252}
Correct predictions are:  4332
Total predictions are:  5000
Accuracy on test set is: 0.8664 



EPOCH NUMBER:  4 (RUN:  3 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7356083051657503, 'tp': 2171, 'tn': 2168, 'fp': 325, 'fn': 336, 'auroc': 0.9386805592555846, 'auprc': 0.9333649358314369, 'eval_loss': 0.5803660942077636}
Correct predictions are:  4339
Total predictions are:  5000
Accuracy on test set is: 0.8678 



EPOCH NUMBER:  5 (RUN:  3 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7224221449531556, 'tp': 2151, 'tn': 2155, 'fp': 338, 'fn': 356, 'auroc': 0.9382931162180311, 'auprc': 0.9338005610868514, 'eval_loss': 0.6634656036376954}
Correct predictions are:  4306
Total predictions are:  5000
Accuracy on test set is: 0.8612 



EPOCH NUMBER:  6 (RUN:  3 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7325532964258469, 'tp': 2148, 'tn': 2183, 'fp': 310, 'fn': 359, 'auroc': 0.9415343416292384, 'auprc': 0.9384538011327752, 'eval_loss': 0.5956844505310058}
Correct predictions are:  4331
Total predictions are:  5000
Accuracy on test set is: 0.8662 



EPOCH NUMBER:  7 (RUN:  3 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7128220143912207, 'tp': 2139, 'tn': 2143, 'fp': 350, 'fn': 368, 'auroc': 0.9131226788818023, 'auprc': 0.9096459217844418, 'eval_loss': 0.7294161884307861}
Correct predictions are:  4282
Total predictions are:  5000
Accuracy on test set is: 0.8564 



EPOCH NUMBER:  8 (RUN:  3 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7088102197996241, 'tp': 2152, 'tn': 2120, 'fp': 373, 'fn': 355, 'auroc': 0.9172310310912837, 'auprc': 0.8844794917161181, 'eval_loss': 0.7646480224609375}
Correct predictions are:  4272
Total predictions are:  5000
Accuracy on test set is: 0.8544 



EPOCH NUMBER:  9 (RUN:  3 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7056229448614616, 'tp': 2079, 'tn': 2183, 'fp': 310, 'fn': 428, 'auroc': 0.9307828973379152, 'auprc': 0.9163953585530735, 'eval_loss': 0.7108708572387695}
Correct predictions are:  4262
Total predictions are:  5000
Accuracy on test set is: 0.8524 


[0.866, 0.8718, 0.8678, 0.8664, 0.8678, 0.8612, 0.8662, 0.8564, 0.8544, 0.8524]

RUN NUMBER:  4


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  4 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7376200400275393, 'tp': 2190, 'tn': 2154, 'fp': 339, 'fn': 317, 'auroc': 0.9402568916140303, 'auprc': 0.937520116106024, 'eval_loss': 0.3197349411010742}
Correct predictions are:  4344
Total predictions are:  5000
Accuracy on test set is: 0.8688 



EPOCH NUMBER:  1 (RUN:  4 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7469276199406408, 'tp': 2215, 'tn': 2152, 'fp': 341, 'fn': 292, 'auroc': 0.9460759772356615, 'auprc': 0.9438730500872242, 'eval_loss': 0.3263892204284668}
Correct predictions are:  4367
Total predictions are:  5000
Accuracy on test set is: 0.8734 



EPOCH NUMBER:  2 (RUN:  4 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7381003188571555, 'tp': 2105, 'tn': 2237, 'fp': 256, 'fn': 402, 'auroc': 0.9430127532199853, 'auprc': 0.9416812462545786, 'eval_loss': 0.48801693267822266}
Correct predictions are:  4342
Total predictions are:  5000
Accuracy on test set is: 0.8684 



EPOCH NUMBER:  3 (RUN:  4 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7376099666523622, 'tp': 2173, 'tn': 2171, 'fp': 322, 'fn': 334, 'auroc': 0.9417803435578935, 'auprc': 0.940235421627367, 'eval_loss': 0.5764190521240234}
Correct predictions are:  4344
Total predictions are:  5000
Accuracy on test set is: 0.8688 



EPOCH NUMBER:  4 (RUN:  4 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7400330215971894, 'tp': 2171, 'tn': 2179, 'fp': 314, 'fn': 336, 'auroc': 0.9407458554475068, 'auprc': 0.9395396094341587, 'eval_loss': 0.6336710319519043}
Correct predictions are:  4350
Total predictions are:  5000
Accuracy on test set is: 0.87 



EPOCH NUMBER:  5 (RUN:  4 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7278698469350641, 'tp': 2202, 'tn': 2117, 'fp': 376, 'fn': 305, 'auroc': 0.9368641450148971, 'auprc': 0.9348032914899148, 'eval_loss': 0.6976642513275146}
Correct predictions are:  4319
Total predictions are:  5000
Accuracy on test set is: 0.8638 



EPOCH NUMBER:  6 (RUN:  4 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.719742507544539, 'tp': 2206, 'tn': 2092, 'fp': 401, 'fn': 301, 'auroc': 0.93681454462603, 'auprc': 0.9318979371589541, 'eval_loss': 0.680161481666565}
Correct predictions are:  4298
Total predictions are:  5000
Accuracy on test set is: 0.8596 



EPOCH NUMBER:  7 (RUN:  4 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.71799562012011, 'tp': 2197, 'tn': 2097, 'fp': 396, 'fn': 310, 'auroc': 0.9235479606160113, 'auprc': 0.9133174661778862, 'eval_loss': 0.7662146492004395}
Correct predictions are:  4294
Total predictions are:  5000
Accuracy on test set is: 0.8588 



EPOCH NUMBER:  8 (RUN:  4 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7212013608231429, 'tp': 2164, 'tn': 2139, 'fp': 354, 'fn': 343, 'auroc': 0.8971609537418773, 'auprc': 0.8947719003284262, 'eval_loss': 0.7673628170013428}
Correct predictions are:  4303
Total predictions are:  5000
Accuracy on test set is: 0.8606 



EPOCH NUMBER:  9 (RUN:  4 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7188370304418142, 'tp': 2170, 'tn': 2127, 'fp': 366, 'fn': 337, 'auroc': 0.9260174999771998, 'auprc': 0.9019458519696714, 'eval_loss': 0.7791253698348999}
Correct predictions are:  4297
Total predictions are:  5000
Accuracy on test set is: 0.8594 


[0.8688, 0.8734, 0.8684, 0.8688, 0.87, 0.8638, 0.8596, 0.8588, 0.8606, 0.8594]

RUN NUMBER:  5


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  5 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7452088742129321, 'tp': 2247, 'tn': 2114, 'fp': 379, 'fn': 260, 'auroc': 0.9486730375966148, 'auprc': 0.9448791268600903, 'eval_loss': 0.3594425151824951}
Correct predictions are:  4361
Total predictions are:  5000
Accuracy on test set is: 0.8722 



EPOCH NUMBER:  1 (RUN:  5 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7573536469709748, 'tp': 2252, 'tn': 2140, 'fp': 353, 'fn': 255, 'auroc': 0.948373275246478, 'auprc': 0.944843799273777, 'eval_loss': 0.41407834396362303}
Correct predictions are:  4392
Total predictions are:  5000
Accuracy on test set is: 0.8784 



EPOCH NUMBER:  2 (RUN:  5 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7466008788113944, 'tp': 2276, 'tn': 2086, 'fp': 407, 'fn': 231, 'auroc': 0.9484843961176657, 'auprc': 0.9463283811485917, 'eval_loss': 0.48204460124969484}
Correct predictions are:  4362
Total predictions are:  5000
Accuracy on test set is: 0.8724 



EPOCH NUMBER:  3 (RUN:  5 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7351927662645196, 'tp': 2247, 'tn': 2088, 'fp': 405, 'fn': 260, 'auroc': 0.9428351518275903, 'auprc': 0.9418094832505434, 'eval_loss': 0.5979270052909851}
Correct predictions are:  4335
Total predictions are:  5000
Accuracy on test set is: 0.867 



EPOCH NUMBER:  4 (RUN:  5 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7476467435834154, 'tp': 2207, 'tn': 2162, 'fp': 331, 'fn': 300, 'auroc': 0.9453288513781949, 'auprc': 0.9442928647530301, 'eval_loss': 0.5832035587310791}
Correct predictions are:  4369
Total predictions are:  5000
Accuracy on test set is: 0.8738 



EPOCH NUMBER:  5 (RUN:  5 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7385041959782862, 'tp': 2160, 'tn': 2186, 'fp': 307, 'fn': 347, 'auroc': 0.9452822110125343, 'auprc': 0.9442478732332973, 'eval_loss': 0.6292579971313477}
Correct predictions are:  4346
Total predictions are:  5000
Accuracy on test set is: 0.8692 



EPOCH NUMBER:  6 (RUN:  5 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7392762536479873, 'tp': 2164, 'tn': 2184, 'fp': 309, 'fn': 343, 'auroc': 0.9445453252353501, 'auprc': 0.9428893381671571, 'eval_loss': 0.6643146584510803}
Correct predictions are:  4348
Total predictions are:  5000
Accuracy on test set is: 0.8696 



EPOCH NUMBER:  7 (RUN:  5 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7384562886450508, 'tp': 2197, 'tn': 2149, 'fp': 344, 'fn': 310, 'auroc': 0.9436845184866249, 'auprc': 0.94003954406656, 'eval_loss': 0.6944044451713562}
Correct predictions are:  4346
Total predictions are:  5000
Accuracy on test set is: 0.8692 



EPOCH NUMBER:  8 (RUN:  5 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7340941748600218, 'tp': 2113, 'tn': 2220, 'fp': 273, 'fn': 394, 'auroc': 0.9383212764388073, 'auprc': 0.9176243058311537, 'eval_loss': 0.7928774231433868}
Correct predictions are:  4333
Total predictions are:  5000
Accuracy on test set is: 0.8666 



EPOCH NUMBER:  9 (RUN:  5 COMB:  LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7437313959099044, 'tp': 2164, 'tn': 2195, 'fp': 298, 'fn': 343, 'auroc': 0.9415736219371961, 'auprc': 0.9365019522419146, 'eval_loss': 0.7374156728744506}
Correct predictions are:  4359
Total predictions are:  5000
Accuracy on test set is: 0.8718 


[0.8722, 0.8784, 0.8724, 0.867, 0.8738, 0.8692, 0.8696, 0.8692, 0.8666, 0.8718]


 Over all runs maximum accuracies are: [0.8718, 0.8734, 0.8784, 0.8806, 0.8814]
The median is: 0.8784
XLNet Accuracy Score on Test set ->  ['0.8784 +/- 0.006599999999999939']


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a



* * * * EVALUATION USING LOW_STM AS PREPROCESSING FUNCTION * * * *

RUN NUMBER:  1


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  1 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6807431313537333, 'tp': 2149, 'tn': 2052, 'fp': 441, 'fn': 358, 'auroc': 0.9198562516730131, 'auprc': 0.9157955678551222, 'eval_loss': 0.3783664962768555}
Correct predictions are:  4201
Total predictions are:  5000
Accuracy on test set is: 0.8402 



EPOCH NUMBER:  1 (RUN:  1 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.689208063884334, 'tp': 2113, 'tn': 2110, 'fp': 383, 'fn': 394, 'auroc': 0.9272091893200443, 'auprc': 0.9256749756443183, 'eval_loss': 0.46151941680908204}
Correct predictions are:  4223
Total predictions are:  5000
Accuracy on test set is: 0.8446 



EPOCH NUMBER:  2 (RUN:  1 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6780297477989672, 'tp': 2061, 'tn': 2133, 'fp': 360, 'fn': 446, 'auroc': 0.9248723709993885, 'auprc': 0.923549388943081, 'eval_loss': 0.634919951057434}
Correct predictions are:  4194
Total predictions are:  5000
Accuracy on test set is: 0.8388 



EPOCH NUMBER:  3 (RUN:  1 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6891157152437529, 'tp': 2062, 'tn': 2159, 'fp': 334, 'fn': 445, 'auroc': 0.9255780565319632, 'auprc': 0.9227690004064087, 'eval_loss': 0.6705461179733276}
Correct predictions are:  4221
Total predictions are:  5000
Accuracy on test set is: 0.8442 



EPOCH NUMBER:  4 (RUN:  1 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6811271827057678, 'tp': 2167, 'tn': 2034, 'fp': 459, 'fn': 340, 'auroc': 0.9227348342411005, 'auprc': 0.9214901934173396, 'eval_loss': 0.7966950692176819}
Correct predictions are:  4201
Total predictions are:  5000
Accuracy on test set is: 0.8402 



EPOCH NUMBER:  5 (RUN:  1 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6919348792970241, 'tp': 2084, 'tn': 2145, 'fp': 348, 'fn': 423, 'auroc': 0.9255050159593251, 'auprc': 0.9236874455730427, 'eval_loss': 0.8077489504814148}
Correct predictions are:  4229
Total predictions are:  5000
Accuracy on test set is: 0.8458 



EPOCH NUMBER:  6 (RUN:  1 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6770281139461848, 'tp': 2072, 'tn': 2120, 'fp': 373, 'fn': 435, 'auroc': 0.9217893068281655, 'auprc': 0.9189550967789142, 'eval_loss': 0.8659236677169799}
Correct predictions are:  4192
Total predictions are:  5000
Accuracy on test set is: 0.8384 



EPOCH NUMBER:  7 (RUN:  1 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.685269172649953, 'tp': 2161, 'tn': 2051, 'fp': 442, 'fn': 346, 'auroc': 0.9148475724049677, 'auprc': 0.914061320912634, 'eval_loss': 0.9265733728408814}
Correct predictions are:  4212
Total predictions are:  5000
Accuracy on test set is: 0.8424 



EPOCH NUMBER:  8 (RUN:  1 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6884151832737525, 'tp': 2110, 'tn': 2111, 'fp': 382, 'fn': 397, 'auroc': 0.9171990308404017, 'auprc': 0.910715654468454, 'eval_loss': 0.9859776487350463}
Correct predictions are:  4221
Total predictions are:  5000
Accuracy on test set is: 0.8442 



EPOCH NUMBER:  9 (RUN:  1 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6796113325505526, 'tp': 2064, 'tn': 2134, 'fp': 359, 'fn': 443, 'auroc': 0.9094184098403331, 'auprc': 0.9004442919478857, 'eval_loss': 1.1360107407093047}
Correct predictions are:  4198
Total predictions are:  5000
Accuracy on test set is: 0.8396 


[0.8402, 0.8446, 0.8388, 0.8442, 0.8402, 0.8458, 0.8384, 0.8424, 0.8442, 0.8396]

RUN NUMBER:  2


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  2 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6990116012279769, 'tp': 2220, 'tn': 2023, 'fp': 470, 'fn': 287, 'auroc': 0.9282041571205918, 'auprc': 0.925545618820628, 'eval_loss': 0.37378603973388674}
Correct predictions are:  4243
Total predictions are:  5000
Accuracy on test set is: 0.8486 



EPOCH NUMBER:  1 (RUN:  2 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6969900946263021, 'tp': 2207, 'tn': 2032, 'fp': 461, 'fn': 300, 'auroc': 0.9275271918131838, 'auprc': 0.9281122787818432, 'eval_loss': 0.41175448608398435}
Correct predictions are:  4239
Total predictions are:  5000
Accuracy on test set is: 0.8478 



EPOCH NUMBER:  2 (RUN:  2 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.70428015620696, 'tp': 2193, 'tn': 2066, 'fp': 427, 'fn': 314, 'auroc': 0.9275850322666529, 'auprc': 0.928571445357967, 'eval_loss': 0.5618191638946534}
Correct predictions are:  4259
Total predictions are:  5000
Accuracy on test set is: 0.8518 



EPOCH NUMBER:  3 (RUN:  2 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.706432285065714, 'tp': 2129, 'tn': 2137, 'fp': 356, 'fn': 378, 'auroc': 0.9333385973746033, 'auprc': 0.9330537854637636, 'eval_loss': 0.6430398743629455}
Correct predictions are:  4266
Total predictions are:  5000
Accuracy on test set is: 0.8532 



EPOCH NUMBER:  4 (RUN:  2 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7059142055919979, 'tp': 2103, 'tn': 2161, 'fp': 332, 'fn': 404, 'auroc': 0.9017405096455956, 'auprc': 0.9193330022890033, 'eval_loss': 0.6319419213294983}
Correct predictions are:  4264
Total predictions are:  5000
Accuracy on test set is: 0.8528 



EPOCH NUMBER:  5 (RUN:  2 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.693643446426997, 'tp': 2140, 'tn': 2094, 'fp': 399, 'fn': 367, 'auroc': 0.9252747741542293, 'auprc': 0.9252634051095445, 'eval_loss': 0.7401983023643494}
Correct predictions are:  4234
Total predictions are:  5000
Accuracy on test set is: 0.8468 



EPOCH NUMBER:  6 (RUN:  2 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6932374021574186, 'tp': 2080, 'tn': 2152, 'fp': 341, 'fn': 427, 'auroc': 0.9247278098660294, 'auprc': 0.9212749931717934, 'eval_loss': 0.7768870044708251}
Correct predictions are:  4232
Total predictions are:  5000
Accuracy on test set is: 0.8464 



EPOCH NUMBER:  7 (RUN:  2 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6848503664711368, 'tp': 2099, 'tn': 2113, 'fp': 380, 'fn': 408, 'auroc': 0.9216119454376522, 'auprc': 0.9142770733231029, 'eval_loss': 0.7358792285919189}
Correct predictions are:  4212
Total predictions are:  5000
Accuracy on test set is: 0.8424 



EPOCH NUMBER:  8 (RUN:  2 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6709125135252946, 'tp': 2074, 'tn': 2103, 'fp': 390, 'fn': 433, 'auroc': 0.9159435009970478, 'auprc': 0.909735807097532, 'eval_loss': 0.9146089747428894}
Correct predictions are:  4177
Total predictions are:  5000
Accuracy on test set is: 0.8354 



EPOCH NUMBER:  9 (RUN:  2 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6636614180480124, 'tp': 2041, 'tn': 2117, 'fp': 376, 'fn': 466, 'auroc': 0.9100559348385291, 'auprc': 0.8966672364938564, 'eval_loss': 0.8543476058959961}
Correct predictions are:  4158
Total predictions are:  5000
Accuracy on test set is: 0.8316 


[0.8486, 0.8478, 0.8518, 0.8532, 0.8528, 0.8468, 0.8464, 0.8424, 0.8354, 0.8316]

RUN NUMBER:  3


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  3 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6957204010507804, 'tp': 2176, 'tn': 2062, 'fp': 431, 'fn': 331, 'auroc': 0.922590193107114, 'auprc': 0.9224352456251455, 'eval_loss': 0.37765573654174805}
Correct predictions are:  4238
Total predictions are:  5000
Accuracy on test set is: 0.8476 



EPOCH NUMBER:  1 (RUN:  3 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6967696931655588, 'tp': 2221, 'tn': 2016, 'fp': 477, 'fn': 286, 'auroc': 0.9254942958752796, 'auprc': 0.9242556083595059, 'eval_loss': 0.4610802925109863}
Correct predictions are:  4237
Total predictions are:  5000
Accuracy on test set is: 0.8474 



EPOCH NUMBER:  2 (RUN:  3 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6784895437611366, 'tp': 2086, 'tn': 2110, 'fp': 383, 'fn': 421, 'auroc': 0.9187020026237005, 'auprc': 0.9141608155547415, 'eval_loss': 0.6080621950149536}
Correct predictions are:  4196
Total predictions are:  5000
Accuracy on test set is: 0.8392 



EPOCH NUMBER:  3 (RUN:  3 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6892283643173462, 'tp': 2132, 'tn': 2091, 'fp': 402, 'fn': 375, 'auroc': 0.9086705639772217, 'auprc': 0.8582461220837386, 'eval_loss': 0.6441385845184326}
Correct predictions are:  4223
Total predictions are:  5000
Accuracy on test set is: 0.8446 



EPOCH NUMBER:  4 (RUN:  3 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6924189700529699, 'tp': 2134, 'tn': 2097, 'fp': 396, 'fn': 373, 'auroc': 0.9238004425954701, 'auprc': 0.9211706535241566, 'eval_loss': 0.6716250784873963}
Correct predictions are:  4231
Total predictions are:  5000
Accuracy on test set is: 0.8462 



EPOCH NUMBER:  5 (RUN:  3 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6848942611487765, 'tp': 2190, 'tn': 2019, 'fp': 474, 'fn': 317, 'auroc': 0.9193826479599599, 'auprc': 0.8938263859752639, 'eval_loss': 0.785390127658844}
Correct predictions are:  4209
Total predictions are:  5000
Accuracy on test set is: 0.8418 



EPOCH NUMBER:  6 (RUN:  3 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6752475400154404, 'tp': 2118, 'tn': 2070, 'fp': 423, 'fn': 389, 'auroc': 0.9223066708842995, 'auprc': 0.9162917471870731, 'eval_loss': 0.8998255895853042}
Correct predictions are:  4188
Total predictions are:  5000
Accuracy on test set is: 0.8376 



EPOCH NUMBER:  7 (RUN:  3 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6782033402960095, 'tp': 2137, 'tn': 2058, 'fp': 435, 'fn': 370, 'auroc': 0.9200233729832442, 'auprc': 0.9210650854340883, 'eval_loss': 0.9331605910301208}
Correct predictions are:  4195
Total predictions are:  5000
Accuracy on test set is: 0.839 



EPOCH NUMBER:  8 (RUN:  3 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6600188116326208, 'tp': 2128, 'tn': 2021, 'fp': 472, 'fn': 379, 'auroc': 0.9161119823179413, 'auprc': 0.9168701210087221, 'eval_loss': 0.892267865562439}
Correct predictions are:  4149
Total predictions are:  5000
Accuracy on test set is: 0.8298 



EPOCH NUMBER:  9 (RUN:  3 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6672064558413713, 'tp': 2086, 'tn': 2082, 'fp': 411, 'fn': 421, 'auroc': 0.904951334818465, 'auprc': 0.8680994466321239, 'eval_loss': 1.0092751180171966}
Correct predictions are:  4168
Total predictions are:  5000
Accuracy on test set is: 0.8336 


[0.8476, 0.8474, 0.8392, 0.8446, 0.8462, 0.8418, 0.8376, 0.839, 0.8298, 0.8336]

RUN NUMBER:  4


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  4 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6920266289762985, 'tp': 2112, 'tn': 2118, 'fp': 375, 'fn': 395, 'auroc': 0.9239585238348268, 'auprc': 0.9221546929302756, 'eval_loss': 0.3886036071777344}
Correct predictions are:  4230
Total predictions are:  5000
Accuracy on test set is: 0.846 



EPOCH NUMBER:  1 (RUN:  4 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6886682638359564, 'tp': 2084, 'tn': 2137, 'fp': 356, 'fn': 423, 'auroc': 0.9240455645172259, 'auprc': 0.9239525970299614, 'eval_loss': 0.5118086305618286}
Correct predictions are:  4221
Total predictions are:  5000
Accuracy on test set is: 0.8442 



EPOCH NUMBER:  2 (RUN:  4 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6933641283118405, 'tp': 2074, 'tn': 2158, 'fp': 335, 'fn': 433, 'auroc': 0.9256007767100894, 'auprc': 0.9238010484793628, 'eval_loss': 0.6529792434215546}
Correct predictions are:  4232
Total predictions are:  5000
Accuracy on test set is: 0.8464 



EPOCH NUMBER:  3 (RUN:  4 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6755969912370562, 'tp': 2104, 'tn': 2085, 'fp': 408, 'fn': 403, 'auroc': 0.920341135474502, 'auprc': 0.9185447953672593, 'eval_loss': 0.7574563470840454}
Correct predictions are:  4189
Total predictions are:  5000
Accuracy on test set is: 0.8378 



EPOCH NUMBER:  4 (RUN:  4 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6844765354811069, 'tp': 2189, 'tn': 2019, 'fp': 474, 'fn': 318, 'auroc': 0.9206561779444351, 'auprc': 0.9157255936645987, 'eval_loss': 0.8202566205501557}
Correct predictions are:  4208
Total predictions are:  5000
Accuracy on test set is: 0.8416 



EPOCH NUMBER:  5 (RUN:  4 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6803172957589018, 'tp': 2147, 'tn': 2053, 'fp': 440, 'fn': 360, 'auroc': 0.9218385072138966, 'auprc': 0.9175311305635311, 'eval_loss': 0.9233465183973313}
Correct predictions are:  4200
Total predictions are:  5000
Accuracy on test set is: 0.84 



EPOCH NUMBER:  6 (RUN:  4 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6791132798467359, 'tp': 2069, 'tn': 2128, 'fp': 365, 'fn': 438, 'auroc': 0.9239466037413734, 'auprc': 0.921982890058614, 'eval_loss': 1.0246323578596115}
Correct predictions are:  4197
Total predictions are:  5000
Accuracy on test set is: 0.8394 



EPOCH NUMBER:  7 (RUN:  4 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6669914023762024, 'tp': 2023, 'tn': 2142, 'fp': 351, 'fn': 484, 'auroc': 0.9154713372952844, 'auprc': 0.910765280805905, 'eval_loss': 1.079008257317543}
Correct predictions are:  4165
Total predictions are:  5000
Accuracy on test set is: 0.833 



EPOCH NUMBER:  8 (RUN:  4 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.688970560501952, 'tp': 2185, 'tn': 2035, 'fp': 458, 'fn': 322, 'auroc': 0.9171407103831695, 'auprc': 0.9127920298027534, 'eval_loss': 0.8901873984336853}
Correct predictions are:  4220
Total predictions are:  5000
Accuracy on test set is: 0.844 



EPOCH NUMBER:  9 (RUN:  4 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.693741939798019, 'tp': 2100, 'tn': 2134, 'fp': 359, 'fn': 407, 'auroc': 0.9136336428877603, 'auprc': 0.9138022640186978, 'eval_loss': 0.9810416939735412}
Correct predictions are:  4234
Total predictions are:  5000
Accuracy on test set is: 0.8468 


[0.846, 0.8442, 0.8464, 0.8378, 0.8416, 0.84, 0.8394, 0.833, 0.844, 0.8468]

RUN NUMBER:  5


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  5 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6525816017111168, 'tp': 2270, 'tn': 1839, 'fp': 654, 'fn': 237, 'auroc': 0.9118596289794911, 'auprc': 0.9082244464333633, 'eval_loss': 0.39650676574707033}
Correct predictions are:  4109
Total predictions are:  5000
Accuracy on test set is: 0.8218 



EPOCH NUMBER:  1 (RUN:  5 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.685269172649953, 'tp': 2161, 'tn': 2051, 'fp': 442, 'fn': 346, 'auroc': 0.9231525175157375, 'auprc': 0.921791315309438, 'eval_loss': 0.41226900634765623}
Correct predictions are:  4212
Total predictions are:  5000
Accuracy on test set is: 0.8424 



EPOCH NUMBER:  2 (RUN:  5 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6875216877080423, 'tp': 2156, 'tn': 2062, 'fp': 431, 'fn': 351, 'auroc': 0.9235122003356506, 'auprc': 0.9210668704466222, 'eval_loss': 0.5154963096618652}
Correct predictions are:  4218
Total predictions are:  5000
Accuracy on test set is: 0.8436 



EPOCH NUMBER:  3 (RUN:  5 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6833685963050992, 'tp': 2061, 'tn': 2146, 'fp': 347, 'fn': 446, 'auroc': 0.922001468491513, 'auprc': 0.9200057676498734, 'eval_loss': 0.6796161127090454}
Correct predictions are:  4207
Total predictions are:  5000
Accuracy on test set is: 0.8414 



EPOCH NUMBER:  4 (RUN:  5 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6796179981735171, 'tp': 2118, 'tn': 2081, 'fp': 412, 'fn': 389, 'auroc': 0.9196309699068042, 'auprc': 0.9171121400616361, 'eval_loss': 0.788857686328888}
Correct predictions are:  4199
Total predictions are:  5000
Accuracy on test set is: 0.8398 



EPOCH NUMBER:  5 (RUN:  5 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6708179288354884, 'tp': 2141, 'tn': 2035, 'fp': 458, 'fn': 366, 'auroc': 0.9013881068827578, 'auprc': 0.8720649816954599, 'eval_loss': 0.8474425314903259}
Correct predictions are:  4176
Total predictions are:  5000
Accuracy on test set is: 0.8352 



EPOCH NUMBER:  6 (RUN:  5 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6634110584796294, 'tp': 2056, 'tn': 2102, 'fp': 391, 'fn': 451, 'auroc': 0.909433129955739, 'auprc': 0.8894485751905258, 'eval_loss': 0.9425806339263916}
Correct predictions are:  4158
Total predictions are:  5000
Accuracy on test set is: 0.8316 



EPOCH NUMBER:  7 (RUN:  5 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6630675311011537, 'tp': 2123, 'tn': 2034, 'fp': 459, 'fn': 384, 'auroc': 0.9135331620999907, 'auprc': 0.9123618328913047, 'eval_loss': 0.920663045501709}
Correct predictions are:  4157
Total predictions are:  5000
Accuracy on test set is: 0.8314 



EPOCH NUMBER:  8 (RUN:  5 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6600635913296607, 'tp': 2066, 'tn': 2084, 'fp': 409, 'fn': 441, 'auroc': 0.9104249777318254, 'auprc': 0.9020749102223657, 'eval_loss': 0.9961594789505005}
Correct predictions are:  4150
Total predictions are:  5000
Accuracy on test set is: 0.83 



EPOCH NUMBER:  9 (RUN:  5 COMB:  LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6576037504198501, 'tp': 2075, 'tn': 2069, 'fp': 424, 'fn': 432, 'auroc': 0.9085997634221452, 'auprc': 0.9042904981149349, 'eval_loss': 1.0613601306915283}
Correct predictions are:  4144
Total predictions are:  5000
Accuracy on test set is: 0.8288 


[0.8218, 0.8424, 0.8436, 0.8414, 0.8398, 0.8352, 0.8316, 0.8314, 0.83, 0.8288]


 Over all runs maximum accuracies are: [0.8436, 0.8458, 0.8468, 0.8476, 0.8532]
The median is: 0.8468
XLNet Accuracy Score on Test set ->  ['0.8468 +/- 0.006399999999999961']


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a



* * * * EVALUATION USING RSW_LOW AS PREPROCESSING FUNCTION * * * *

RUN NUMBER:  1


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  1 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.746069204292048, 'tp': 2208, 'tn': 2157, 'fp': 336, 'fn': 299, 'auroc': 0.9460745372243718, 'auprc': 0.9435904515193142, 'eval_loss': 0.339992813873291}
Correct predictions are:  4365
Total predictions are:  5000
Accuracy on test set is: 0.873 



EPOCH NUMBER:  1 (RUN:  1 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.737880063809295, 'tp': 2089, 'tn': 2251, 'fp': 242, 'fn': 418, 'auroc': 0.948778558423898, 'auprc': 0.9462364958579865, 'eval_loss': 0.42765918464660646}
Correct predictions are:  4340
Total predictions are:  5000
Accuracy on test set is: 0.868 



EPOCH NUMBER:  2 (RUN:  1 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7588648239156576, 'tp': 2190, 'tn': 2207, 'fp': 286, 'fn': 317, 'auroc': 0.9503525707641547, 'auprc': 0.9480504695374596, 'eval_loss': 0.46647271547317504}
Correct predictions are:  4397
Total predictions are:  5000
Accuracy on test set is: 0.8794 



EPOCH NUMBER:  3 (RUN:  1 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7552019008131712, 'tp': 2198, 'tn': 2190, 'fp': 303, 'fn': 309, 'auroc': 0.9477588704295442, 'auprc': 0.945989963020855, 'eval_loss': 0.5912763192176819}
Correct predictions are:  4388
Total predictions are:  5000
Accuracy on test set is: 0.8776 



EPOCH NUMBER:  4 (RUN:  1 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7541529827782374, 'tp': 2226, 'tn': 2159, 'fp': 334, 'fn': 281, 'auroc': 0.9481895138057882, 'auprc': 0.9462078145019961, 'eval_loss': 0.6112389868736267}
Correct predictions are:  4385
Total predictions are:  5000
Accuracy on test set is: 0.877 



EPOCH NUMBER:  5 (RUN:  1 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7369479198739552, 'tp': 2154, 'tn': 2188, 'fp': 305, 'fn': 353, 'auroc': 0.9426595504508755, 'auprc': 0.9428221850976676, 'eval_loss': 0.6953788382530213}
Correct predictions are:  4342
Total predictions are:  5000
Accuracy on test set is: 0.8684 



EPOCH NUMBER:  6 (RUN:  1 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7461942357473555, 'tp': 2162, 'tn': 2203, 'fp': 290, 'fn': 345, 'auroc': 0.9450086088674935, 'auprc': 0.9427017794727784, 'eval_loss': 0.7565275912284851}
Correct predictions are:  4365
Total predictions are:  5000
Accuracy on test set is: 0.873 



EPOCH NUMBER:  7 (RUN:  1 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7392096803781671, 'tp': 2140, 'tn': 2207, 'fp': 286, 'fn': 367, 'auroc': 0.9308945782134932, 'auprc': 0.9077479910216814, 'eval_loss': 0.7913833836317062}
Correct predictions are:  4347
Total predictions are:  5000
Accuracy on test set is: 0.8694 



EPOCH NUMBER:  8 (RUN:  1 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7471935081635983, 'tp': 2151, 'tn': 2216, 'fp': 277, 'fn': 356, 'auroc': 0.9454056519803116, 'auprc': 0.9424783139667756, 'eval_loss': 0.7974108634710312}
Correct predictions are:  4367
Total predictions are:  5000
Accuracy on test set is: 0.8734 



EPOCH NUMBER:  9 (RUN:  1 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7436002239474269, 'tp': 2191, 'tn': 2168, 'fp': 325, 'fn': 316, 'auroc': 0.9412051390482901, 'auprc': 0.9324646645930341, 'eval_loss': 0.878301694726944}
Correct predictions are:  4359
Total predictions are:  5000
Accuracy on test set is: 0.8718 


[0.873, 0.868, 0.8794, 0.8776, 0.877, 0.8684, 0.873, 0.8694, 0.8734, 0.8718]

RUN NUMBER:  2


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  2 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7349594896657105, 'tp': 2203, 'tn': 2134, 'fp': 359, 'fn': 304, 'auroc': 0.9407696956344138, 'auprc': 0.9391484243877412, 'eval_loss': 0.31997462005615235}
Correct predictions are:  4337
Total predictions are:  5000
Accuracy on test set is: 0.8674 



EPOCH NUMBER:  1 (RUN:  2 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.736530535794325, 'tp': 2155, 'tn': 2186, 'fp': 307, 'fn': 352, 'auroc': 0.941350740189803, 'auprc': 0.9398767761550346, 'eval_loss': 0.3809477195739746}
Correct predictions are:  4341
Total predictions are:  5000
Accuracy on test set is: 0.8682 



EPOCH NUMBER:  2 (RUN:  2 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7348176215576938, 'tp': 2134, 'tn': 2202, 'fp': 291, 'fn': 373, 'auroc': 0.9412232191900385, 'auprc': 0.939196791601802, 'eval_loss': 0.45924263229370116}
Correct predictions are:  4336
Total predictions are:  5000
Accuracy on test set is: 0.8672 



EPOCH NUMBER:  3 (RUN:  2 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7408274800653902, 'tp': 2173, 'tn': 2179, 'fp': 314, 'fn': 334, 'auroc': 0.9394303251337491, 'auprc': 0.9389541405143433, 'eval_loss': 0.48477059020996094}
Correct predictions are:  4352
Total predictions are:  5000
Accuracy on test set is: 0.8704 



EPOCH NUMBER:  4 (RUN:  2 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7355162832807745, 'tp': 2232, 'tn': 2105, 'fp': 388, 'fn': 275, 'auroc': 0.9414016205887055, 'auprc': 0.9389505435924268, 'eval_loss': 0.6112964548110962}
Correct predictions are:  4337
Total predictions are:  5000
Accuracy on test set is: 0.8674 



EPOCH NUMBER:  5 (RUN:  2 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7274814965827919, 'tp': 2221, 'tn': 2096, 'fp': 397, 'fn': 286, 'auroc': 0.9076183957282223, 'auprc': 0.8611708434990103, 'eval_loss': 0.6968697444915771}
Correct predictions are:  4317
Total predictions are:  5000
Accuracy on test set is: 0.8634 



EPOCH NUMBER:  6 (RUN:  2 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7302337106480639, 'tp': 2139, 'tn': 2186, 'fp': 307, 'fn': 368, 'auroc': 0.9358489370556666, 'auprc': 0.9326528921244284, 'eval_loss': 0.7117902132034302}
Correct predictions are:  4325
Total predictions are:  5000
Accuracy on test set is: 0.865 



EPOCH NUMBER:  7 (RUN:  2 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7245849406359417, 'tp': 2113, 'tn': 2197, 'fp': 296, 'fn': 394, 'auroc': 0.9132292397172392, 'auprc': 0.8766669562367203, 'eval_loss': 0.7624769472122193}
Correct predictions are:  4310
Total predictions are:  5000
Accuracy on test set is: 0.862 



EPOCH NUMBER:  8 (RUN:  2 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7163356637752235, 'tp': 2070, 'tn': 2217, 'fp': 276, 'fn': 437, 'auroc': 0.9310188991881696, 'auprc': 0.9256781213545183, 'eval_loss': 0.8027086275100708}
Correct predictions are:  4287
Total predictions are:  5000
Accuracy on test set is: 0.8574 



EPOCH NUMBER:  9 (RUN:  2 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7245219102823922, 'tp': 2187, 'tn': 2124, 'fp': 369, 'fn': 320, 'auroc': 0.9324925907419113, 'auprc': 0.9196810678902079, 'eval_loss': 0.7846207954406739}
Correct predictions are:  4311
Total predictions are:  5000
Accuracy on test set is: 0.8622 


[0.8674, 0.8682, 0.8672, 0.8704, 0.8674, 0.8634, 0.865, 0.862, 0.8574, 0.8622]

RUN NUMBER:  3


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  3 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7480972957587829, 'tp': 2147, 'tn': 2222, 'fp': 271, 'fn': 360, 'auroc': 0.9462222983828193, 'auprc': 0.9449792790527226, 'eval_loss': 0.33999703369140627}
Correct predictions are:  4369
Total predictions are:  5000
Accuracy on test set is: 0.8738 



EPOCH NUMBER:  1 (RUN:  3 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7520165666239198, 'tp': 2207, 'tn': 2173, 'fp': 320, 'fn': 300, 'auroc': 0.9497681661824229, 'auprc': 0.9470347609702826, 'eval_loss': 0.40456755027770996}
Correct predictions are:  4380
Total predictions are:  5000
Accuracy on test set is: 0.876 



EPOCH NUMBER:  2 (RUN:  3 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7532118418409318, 'tp': 2192, 'tn': 2191, 'fp': 302, 'fn': 315, 'auroc': 0.9496344051337362, 'auprc': 0.948581080637873, 'eval_loss': 0.5250691833496094}
Correct predictions are:  4383
Total predictions are:  5000
Accuracy on test set is: 0.8766 



EPOCH NUMBER:  3 (RUN:  3 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7473228126521242, 'tp': 2129, 'tn': 2237, 'fp': 256, 'fn': 378, 'auroc': 0.9456262137095154, 'auprc': 0.944698888406525, 'eval_loss': 0.6372101324081421}
Correct predictions are:  4366
Total predictions are:  5000
Accuracy on test set is: 0.8732 



EPOCH NUMBER:  4 (RUN:  3 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7488371247940929, 'tp': 2207, 'tn': 2165, 'fp': 328, 'fn': 300, 'auroc': 0.9490398404723491, 'auprc': 0.9475934045808128, 'eval_loss': 0.6214052081108093}
Correct predictions are:  4372
Total predictions are:  5000
Accuracy on test set is: 0.8744 



EPOCH NUMBER:  5 (RUN:  3 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7411492891635341, 'tp': 2240, 'tn': 2111, 'fp': 382, 'fn': 267, 'auroc': 0.946495420524097, 'auprc': 0.9449778455324357, 'eval_loss': 0.677433264541626}
Correct predictions are:  4351
Total predictions are:  5000
Accuracy on test set is: 0.8702 



EPOCH NUMBER:  6 (RUN:  3 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7419189458289807, 'tp': 2222, 'tn': 2132, 'fp': 361, 'fn': 285, 'auroc': 0.9429585127947404, 'auprc': 0.9385238223346465, 'eval_loss': 0.7321016736984253}
Correct predictions are:  4354
Total predictions are:  5000
Accuracy on test set is: 0.8708 



EPOCH NUMBER:  7 (RUN:  3 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.734903631756643, 'tp': 2198, 'tn': 2139, 'fp': 354, 'fn': 309, 'auroc': 0.9403268121622074, 'auprc': 0.9367761364752454, 'eval_loss': 0.7789405588388443}
Correct predictions are:  4337
Total predictions are:  5000
Accuracy on test set is: 0.8674 



EPOCH NUMBER:  8 (RUN:  3 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7389890998703145, 'tp': 2116, 'tn': 2229, 'fp': 264, 'fn': 391, 'auroc': 0.9448449275842322, 'auprc': 0.9425989331080934, 'eval_loss': 0.8037175976276397}
Correct predictions are:  4345
Total predictions are:  5000
Accuracy on test set is: 0.869 



EPOCH NUMBER:  9 (RUN:  3 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7282717714057255, 'tp': 2134, 'tn': 2186, 'fp': 307, 'fn': 373, 'auroc': 0.9399451291698129, 'auprc': 0.9329421130995352, 'eval_loss': 0.8947631598949433}
Correct predictions are:  4320
Total predictions are:  5000
Accuracy on test set is: 0.864 


[0.8738, 0.876, 0.8766, 0.8732, 0.8744, 0.8702, 0.8708, 0.8674, 0.869, 0.864]

RUN NUMBER:  4


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  4 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7488371247940929, 'tp': 2207, 'tn': 2165, 'fp': 328, 'fn': 300, 'auroc': 0.9470433448198234, 'auprc': 0.943964290387912, 'eval_loss': 0.3188382080078125}
Correct predictions are:  4372
Total predictions are:  5000
Accuracy on test set is: 0.8744 



EPOCH NUMBER:  1 (RUN:  4 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7413741985527509, 'tp': 2212, 'tn': 2141, 'fp': 352, 'fn': 295, 'auroc': 0.9454900526420127, 'auprc': 0.9436702836711631, 'eval_loss': 0.44358078804016116}
Correct predictions are:  4353
Total predictions are:  5000
Accuracy on test set is: 0.8706 



EPOCH NUMBER:  2 (RUN:  4 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7385252224194787, 'tp': 2158, 'tn': 2188, 'fp': 305, 'fn': 349, 'auroc': 0.9462054182504791, 'auprc': 0.9441873642845674, 'eval_loss': 0.5110960702896118}
Correct predictions are:  4346
Total predictions are:  5000
Accuracy on test set is: 0.8692 



EPOCH NUMBER:  3 (RUN:  4 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7440136730671969, 'tp': 2180, 'tn': 2180, 'fp': 313, 'fn': 327, 'auroc': 0.9488252787901857, 'auprc': 0.9487822410553619, 'eval_loss': 0.5805129176616669}
Correct predictions are:  4360
Total predictions are:  5000
Accuracy on test set is: 0.872 



EPOCH NUMBER:  4 (RUN:  4 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7425945984055008, 'tp': 2215, 'tn': 2141, 'fp': 352, 'fn': 292, 'auroc': 0.9475067884532214, 'auprc': 0.9466412811527061, 'eval_loss': 0.6773825848817825}
Correct predictions are:  4356
Total predictions are:  5000
Accuracy on test set is: 0.8712 



EPOCH NUMBER:  5 (RUN:  4 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7437338783350926, 'tp': 2124, 'tn': 2233, 'fp': 260, 'fn': 383, 'auroc': 0.944677326270238, 'auprc': 0.942879489031335, 'eval_loss': 0.7255117858886718}
Correct predictions are:  4357
Total predictions are:  5000
Accuracy on test set is: 0.8714 



EPOCH NUMBER:  6 (RUN:  4 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7348717434856539, 'tp': 2159, 'tn': 2178, 'fp': 315, 'fn': 348, 'auroc': 0.9437749191953665, 'auprc': 0.9413251640037394, 'eval_loss': 0.8023413382530212}
Correct predictions are:  4337
Total predictions are:  5000
Accuracy on test set is: 0.8674 



EPOCH NUMBER:  7 (RUN:  4 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7372018382812625, 'tp': 2184, 'tn': 2159, 'fp': 334, 'fn': 323, 'auroc': 0.9427164308968182, 'auprc': 0.9360431727591161, 'eval_loss': 0.7291898706436157}
Correct predictions are:  4343
Total predictions are:  5000
Accuracy on test set is: 0.8686 



EPOCH NUMBER:  8 (RUN:  4 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7346276349611987, 'tp': 2124, 'tn': 2211, 'fp': 282, 'fn': 383, 'auroc': 0.9441335620071261, 'auprc': 0.9410077775259146, 'eval_loss': 0.7937760672092438}
Correct predictions are:  4335
Total predictions are:  5000
Accuracy on test set is: 0.867 



EPOCH NUMBER:  9 (RUN:  4 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7380156316618416, 'tp': 2172, 'tn': 2173, 'fp': 320, 'fn': 335, 'auroc': 0.9448685277692578, 'auprc': 0.9407982460920343, 'eval_loss': 0.8002376328468322}
Correct predictions are:  4345
Total predictions are:  5000
Accuracy on test set is: 0.869 


[0.8744, 0.8706, 0.8692, 0.872, 0.8712, 0.8714, 0.8674, 0.8686, 0.867, 0.869]

RUN NUMBER:  5


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  5 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7240067193948849, 'tp': 2157, 'tn': 2153, 'fp': 340, 'fn': 350, 'auroc': 0.9385498382307317, 'auprc': 0.9376645239716279, 'eval_loss': 0.32405532150268557}
Correct predictions are:  4310
Total predictions are:  5000
Accuracy on test set is: 0.862 



EPOCH NUMBER:  1 (RUN:  5 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.740004080021776, 'tp': 2178, 'tn': 2172, 'fp': 321, 'fn': 329, 'auroc': 0.9425908299121064, 'auprc': 0.9406465887342551, 'eval_loss': 0.34452029571533205}
Correct predictions are:  4350
Total predictions are:  5000
Accuracy on test set is: 0.87 



EPOCH NUMBER:  2 (RUN:  5 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7349337761081798, 'tp': 2224, 'tn': 2112, 'fp': 381, 'fn': 283, 'auroc': 0.9439192403268442, 'auprc': 0.9420062231216265, 'eval_loss': 0.5004435771942138}
Correct predictions are:  4336
Total predictions are:  5000
Accuracy on test set is: 0.8672 



EPOCH NUMBER:  3 (RUN:  5 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7193995960971805, 'tp': 2187, 'tn': 2111, 'fp': 382, 'fn': 320, 'auroc': 0.9393983248828671, 'auprc': 0.9359001558425145, 'eval_loss': 0.5639253684997558}
Correct predictions are:  4298
Total predictions are:  5000
Accuracy on test set is: 0.8596 



EPOCH NUMBER:  4 (RUN:  5 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.735816344713715, 'tp': 2126, 'tn': 2212, 'fp': 281, 'fn': 381, 'auroc': 0.9404512931381382, 'auprc': 0.9387986414547349, 'eval_loss': 0.5880575378417969}
Correct predictions are:  4338
Total predictions are:  5000
Accuracy on test set is: 0.8676 



EPOCH NUMBER:  5 (RUN:  5 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7211368870438484, 'tp': 2121, 'tn': 2181, 'fp': 312, 'fn': 386, 'auroc': 0.9367536641487272, 'auprc': 0.9326423210898028, 'eval_loss': 0.7415385482788086}
Correct predictions are:  4302
Total predictions are:  5000
Accuracy on test set is: 0.8604 



EPOCH NUMBER:  6 (RUN:  5 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.716004547730362, 'tp': 2159, 'tn': 2131, 'fp': 362, 'fn': 348, 'auroc': 0.898092721046933, 'auprc': 0.8652585027365238, 'eval_loss': 0.7876802278518676}
Correct predictions are:  4290
Total predictions are:  5000
Accuracy on test set is: 0.858 



EPOCH NUMBER:  7 (RUN:  5 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7265170856770269, 'tp': 2189, 'tn': 2127, 'fp': 366, 'fn': 318, 'auroc': 0.9357926166141142, 'auprc': 0.9300631937901842, 'eval_loss': 0.7532929393768311}
Correct predictions are:  4316
Total predictions are:  5000
Accuracy on test set is: 0.8632 



EPOCH NUMBER:  8 (RUN:  5 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7201489079926281, 'tp': 2184, 'tn': 2116, 'fp': 377, 'fn': 323, 'auroc': 0.923903563403937, 'auprc': 0.8984657118075209, 'eval_loss': 0.756108141708374}
Correct predictions are:  4300
Total predictions are:  5000
Accuracy on test set is: 0.86 



EPOCH NUMBER:  9 (RUN:  5 COMB:  RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7221568584522006, 'tp': 2236, 'tn': 2066, 'fp': 427, 'fn': 271, 'auroc': 0.8882521638969649, 'auprc': 0.8103150127114452, 'eval_loss': 0.8105242092132569}
Correct predictions are:  4302
Total predictions are:  5000
Accuracy on test set is: 0.8604 


[0.862, 0.87, 0.8672, 0.8596, 0.8676, 0.8604, 0.858, 0.8632, 0.86, 0.8604]


 Over all runs maximum accuracies are: [0.87, 0.8704, 0.8744, 0.8766, 0.8794]
The median is: 0.8744
XLNet Accuracy Score on Test set ->  ['0.8744 +/- 0.0050000000000000044']


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a



* * * * EVALUATION USING RSW_STM AS PREPROCESSING FUNCTION * * * *

RUN NUMBER:  1


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  1 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6261815575055129, 'tp': 2072, 'tn': 1993, 'fp': 500, 'fn': 435, 'auroc': 0.8939066082278084, 'auprc': 0.8899466093557553, 'eval_loss': 0.4163298034667969}
Correct predictions are:  4065
Total predictions are:  5000
Accuracy on test set is: 0.813 



EPOCH NUMBER:  1 (RUN:  1 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6419933379568439, 'tp': 2017, 'tn': 2087, 'fp': 406, 'fn': 490, 'auroc': 0.9051195761374771, 'auprc': 0.9028021410259579, 'eval_loss': 0.4126333770751953}
Correct predictions are:  4104
Total predictions are:  5000
Accuracy on test set is: 0.8208 



EPOCH NUMBER:  2 (RUN:  1 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6397623355848466, 'tp': 2030, 'tn': 2069, 'fp': 424, 'fn': 477, 'auroc': 0.9049920551377123, 'auprc': 0.9014452457591926, 'eval_loss': 0.4673016357421875}
Correct predictions are:  4099
Total predictions are:  5000
Accuracy on test set is: 0.8198 



EPOCH NUMBER:  3 (RUN:  1 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6469428801767648, 'tp': 2094, 'tn': 2023, 'fp': 470, 'fn': 413, 'auroc': 0.9058208616355552, 'auprc': 0.9026320241789525, 'eval_loss': 0.6311463500976563}
Correct predictions are:  4117
Total predictions are:  5000
Accuracy on test set is: 0.8234 



EPOCH NUMBER:  4 (RUN:  1 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.641462525705041, 'tp': 2003, 'tn': 2099, 'fp': 394, 'fn': 504, 'auroc': 0.9002036975969891, 'auprc': 0.8776978271468228, 'eval_loss': 0.6630307662963867}
Correct predictions are:  4102
Total predictions are:  5000
Accuracy on test set is: 0.8204 



EPOCH NUMBER:  5 (RUN:  1 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6457429632121908, 'tp': 2014, 'tn': 2099, 'fp': 394, 'fn': 493, 'auroc': 0.8658931886025989, 'auprc': 0.8704458928400756, 'eval_loss': 0.6566553619384765}
Correct predictions are:  4113
Total predictions are:  5000
Accuracy on test set is: 0.8226 



EPOCH NUMBER:  6 (RUN:  1 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6312857295673028, 'tp': 2027, 'tn': 2051, 'fp': 442, 'fn': 480, 'auroc': 0.8915658698764201, 'auprc': 0.8606323914618721, 'eval_loss': 0.7628189208984375}
Correct predictions are:  4078
Total predictions are:  5000
Accuracy on test set is: 0.8156 



EPOCH NUMBER:  7 (RUN:  1 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6465576849309319, 'tp': 2039, 'tn': 2077, 'fp': 416, 'fn': 468, 'auroc': 0.8482687304268466, 'auprc': 0.8617287991586904, 'eval_loss': 0.8265394199371338}
Correct predictions are:  4116
Total predictions are:  5000
Accuracy on test set is: 0.8232 



EPOCH NUMBER:  8 (RUN:  1 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6393349792835368, 'tp': 2084, 'tn': 2014, 'fp': 479, 'fn': 423, 'auroc': 0.8721879579535903, 'auprc': 0.876170367517753, 'eval_loss': 0.7757211013793945}
Correct predictions are:  4098
Total predictions are:  5000
Accuracy on test set is: 0.8196 



EPOCH NUMBER:  9 (RUN:  1 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6404396917348397, 'tp': 2074, 'tn': 2027, 'fp': 466, 'fn': 433, 'auroc': 0.8072668089717823, 'auprc': 0.7440922154602977, 'eval_loss': 0.8295438491821289}
Correct predictions are:  4101
Total predictions are:  5000
Accuracy on test set is: 0.8202 


[0.813, 0.8208, 0.8198, 0.8234, 0.8204, 0.8226, 0.8156, 0.8232, 0.8196, 0.8202]

RUN NUMBER:  2


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  2 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.627502888248018, 'tp': 2004, 'tn': 2064, 'fp': 429, 'fn': 503, 'auroc': 0.8949621365031502, 'auprc': 0.8906086982762323, 'eval_loss': 0.42679334716796874}
Correct predictions are:  4068
Total predictions are:  5000
Accuracy on test set is: 0.8136 



EPOCH NUMBER:  1 (RUN:  2 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6361084685954904, 'tp': 1940, 'tn': 2144, 'fp': 349, 'fn': 567, 'auroc': 0.9040786079762866, 'auprc': 0.9001148734293727, 'eval_loss': 0.4429325439453125}
Correct predictions are:  4084
Total predictions are:  5000
Accuracy on test set is: 0.8168 



EPOCH NUMBER:  2 (RUN:  2 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6537396102166431, 'tp': 2102, 'tn': 2032, 'fp': 461, 'fn': 405, 'auroc': 0.9063946261338689, 'auprc': 0.9019343984977044, 'eval_loss': 0.4862514038085938}
Correct predictions are:  4134
Total predictions are:  5000
Accuracy on test set is: 0.8268 



EPOCH NUMBER:  3 (RUN:  2 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6437418185369009, 'tp': 2090, 'tn': 2019, 'fp': 474, 'fn': 417, 'auroc': 0.9049223745914168, 'auprc': 0.8948223936599404, 'eval_loss': 0.6140489486694336}
Correct predictions are:  4109
Total predictions are:  5000
Accuracy on test set is: 0.8218 



EPOCH NUMBER:  4 (RUN:  2 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6487974217037961, 'tp': 2072, 'tn': 2050, 'fp': 443, 'fn': 435, 'auroc': 0.9062789452269306, 'auprc': 0.9042261131831648, 'eval_loss': 0.6283536666870118}
Correct predictions are:  4122
Total predictions are:  5000
Accuracy on test set is: 0.8244 



EPOCH NUMBER:  5 (RUN:  2 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6434747988458749, 'tp': 1914, 'tn': 2184, 'fp': 309, 'fn': 593, 'auroc': 0.9000904967094941, 'auprc': 0.896706139519508, 'eval_loss': 0.7400112014770508}
Correct predictions are:  4098
Total predictions are:  5000
Accuracy on test set is: 0.8196 



EPOCH NUMBER:  6 (RUN:  2 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6378146467347753, 'tp': 1931, 'tn': 2156, 'fp': 337, 'fn': 576, 'auroc': 0.8964763883748849, 'auprc': 0.8732883626677705, 'eval_loss': 0.794262297821045}
Correct predictions are:  4087
Total predictions are:  5000
Accuracy on test set is: 0.8174 



EPOCH NUMBER:  7 (RUN:  2 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6392422473448439, 'tp': 2043, 'tn': 2055, 'fp': 438, 'fn': 464, 'auroc': 0.8868668730362846, 'auprc': 0.8534132989565996, 'eval_loss': 0.8242309875488282}
Correct predictions are:  4098
Total predictions are:  5000
Accuracy on test set is: 0.8196 



EPOCH NUMBER:  8 (RUN:  2 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6387620719708423, 'tp': 1968, 'tn': 2125, 'fp': 368, 'fn': 539, 'auroc': 0.8906315425512936, 'auprc': 0.8621457013380969, 'eval_loss': 0.9015211891174316}
Correct predictions are:  4093
Total predictions are:  5000
Accuracy on test set is: 0.8186 



EPOCH NUMBER:  9 (RUN:  2 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6348389528422452, 'tp': 2067, 'tn': 2020, 'fp': 473, 'fn': 440, 'auroc': 0.8942810111631274, 'auprc': 0.8730406506803373, 'eval_loss': 0.90748264503479}
Correct predictions are:  4087
Total predictions are:  5000
Accuracy on test set is: 0.8174 


[0.8136, 0.8168, 0.8268, 0.8218, 0.8244, 0.8196, 0.8174, 0.8196, 0.8186, 0.8174]

RUN NUMBER:  3


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  3 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.4916209743084386, 'auprc': 0.49546097264388017, 'eval_loss': 0.6939625}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  1 (RUN:  3 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.49953015631642556, 'auprc': 0.5011838736057096, 'eval_loss': 0.69329970703125}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  2 (RUN:  3 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.5017641738311228, 'auprc': 0.5022142978894069, 'eval_loss': 0.69312822265625}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  3 (RUN:  3 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.49820206590419663, 'auprc': 0.5002989282059337, 'eval_loss': 0.6933302734375}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  4 (RUN:  3 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.5041526725569528, 'auprc': 0.5035135091639602, 'eval_loss': 0.69347890625}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  5 (RUN:  3 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.49948231594135695, 'auprc': 0.5012386934026108, 'eval_loss': 0.694212890625}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  6 (RUN:  3 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.5, 'auprc': 0.5014, 'eval_loss': 0.6942087890625}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  7 (RUN:  3 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.5157326833442374, 'auprc': 0.5095634056630498, 'eval_loss': 0.69324697265625}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  8 (RUN:  3 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.5001867214638963, 'auprc': 0.501493386842967, 'eval_loss': 0.693170703125}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  9 (RUN:  3 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.5034697072025045, 'auprc': 0.5040051720363774, 'eval_loss': 0.69496123046875}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 


[0.5014, 0.5014, 0.4986, 0.4986, 0.4986, 0.4986, 0.5014, 0.5014, 0.5014, 0.4986]

RUN NUMBER:  4


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  4 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.5694086241636135, 'auprc': 0.5451751815589667, 'eval_loss': 0.69420078125}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  1 (RUN:  4 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.49887871120909594, 'auprc': 0.5011643148422398, 'eval_loss': 0.69397080078125}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  2 (RUN:  4 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.48640829344102054, 'auprc': 0.49482846434278166, 'eval_loss': 0.693712890625}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  3 (RUN:  4 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.5138281884129972, 'auprc': 0.5093160318640015, 'eval_loss': 0.69473603515625}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  4 (RUN:  4 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.5131938634398894, 'auprc': 0.5081547593536155, 'eval_loss': 0.693524609375}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  5 (RUN:  4 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.5391194266963053, 'auprc': 0.5219317297690237, 'eval_loss': 0.69352060546875}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  6 (RUN:  4 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.49537964377640725, 'auprc': 0.49909004503149634, 'eval_loss': 0.69315595703125}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  7 (RUN:  4 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.5019043349299858, 'auprc': 0.5024054466222119, 'eval_loss': 0.693169140625}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  8 (RUN:  4 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.4940327532167852, 'auprc': 0.4984311634652613, 'eval_loss': 0.69364208984375}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  9 (RUN:  4 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.500182721432536, 'auprc': 0.5022584103111426, 'eval_loss': 0.69330859375}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 


[0.5014, 0.5014, 0.5014, 0.4986, 0.5014, 0.5014, 0.4986, 0.4986, 0.4986, 0.5014]

RUN NUMBER:  5


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  5 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6592664597378609, 'tp': 2101, 'tn': 2047, 'fp': 446, 'fn': 406, 'auroc': 0.91287259692116, 'auprc': 0.9101704334873268, 'eval_loss': 0.39764465026855467}
Correct predictions are:  4148
Total predictions are:  5000
Accuracy on test set is: 0.8296 



EPOCH NUMBER:  1 (RUN:  5 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.676706843553879, 'tp': 2142, 'tn': 2049, 'fp': 444, 'fn': 365, 'auroc': 0.9173496720214287, 'auprc': 0.9121310629990356, 'eval_loss': 0.4209701602935791}
Correct predictions are:  4191
Total predictions are:  5000
Accuracy on test set is: 0.8382 



EPOCH NUMBER:  2 (RUN:  5 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.674535148914178, 'tp': 2127, 'tn': 2059, 'fp': 434, 'fn': 380, 'auroc': 0.9142262875340943, 'auprc': 0.9116171057006318, 'eval_loss': 0.5727217468261718}
Correct predictions are:  4186
Total predictions are:  5000
Accuracy on test set is: 0.8372 



EPOCH NUMBER:  3 (RUN:  5 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6659989831970334, 'tp': 2088, 'tn': 2077, 'fp': 416, 'fn': 419, 'auroc': 0.9159391809631787, 'auprc': 0.9127407252178057, 'eval_loss': 0.697053418636322}
Correct predictions are:  4165
Total predictions are:  5000
Accuracy on test set is: 0.833 



EPOCH NUMBER:  4 (RUN:  5 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6654039439906511, 'tp': 2170, 'tn': 1990, 'fp': 503, 'fn': 337, 'auroc': 0.9134738016346048, 'auprc': 0.910966537830741, 'eval_loss': 0.7369520895957947}
Correct predictions are:  4160
Total predictions are:  5000
Accuracy on test set is: 0.832 



EPOCH NUMBER:  5 (RUN:  5 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.672154406488437, 'tp': 2031, 'tn': 2147, 'fp': 346, 'fn': 476, 'auroc': 0.9146319707146506, 'auprc': 0.902525328428719, 'eval_loss': 0.8697445841789245}
Correct predictions are:  4178
Total predictions are:  5000
Accuracy on test set is: 0.8356 



EPOCH NUMBER:  6 (RUN:  5 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6679598804337741, 'tp': 2033, 'tn': 2135, 'fp': 358, 'fn': 474, 'auroc': 0.9104735381125387, 'auprc': 0.9056483122118867, 'eval_loss': 0.9595547863960266}
Correct predictions are:  4168
Total predictions are:  5000
Accuracy on test set is: 0.8336 



EPOCH NUMBER:  7 (RUN:  5 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.659291336587775, 'tp': 2003, 'tn': 2142, 'fp': 351, 'fn': 504, 'auroc': 0.908634003690589, 'auprc': 0.901181271810251, 'eval_loss': 1.0349119250774383}
Correct predictions are:  4145
Total predictions are:  5000
Accuracy on test set is: 0.829 



EPOCH NUMBER:  8 (RUN:  5 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.647494227371819, 'tp': 2030, 'tn': 2088, 'fp': 405, 'fn': 477, 'auroc': 0.9061602242961584, 'auprc': 0.8992977248750672, 'eval_loss': 1.1039241421222687}
Correct predictions are:  4118
Total predictions are:  5000
Accuracy on test set is: 0.8236 



EPOCH NUMBER:  9 (RUN:  5 COMB:  RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6576680018797325, 'tp': 2033, 'tn': 2110, 'fp': 383, 'fn': 474, 'auroc': 0.8964273479904081, 'auprc': 0.8674042274574504, 'eval_loss': 1.0275977524757385}
Correct predictions are:  4143
Total predictions are:  5000
Accuracy on test set is: 0.8286 


[0.8296, 0.8382, 0.8372, 0.833, 0.832, 0.8356, 0.8336, 0.829, 0.8236, 0.8286]


 Over all runs maximum accuracies are: [0.5014, 0.5014, 0.8234, 0.8268, 0.8382]
The median is: 0.8234
XLNet Accuracy Score on Test set ->  ['0.8234 +/- 0.32200000000000006']


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a



* * * * EVALUATION USING STM_LOW AS PREPROCESSING FUNCTION * * * *

RUN NUMBER:  1


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  1 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6874172186318842, 'tp': 2064, 'tn': 2153, 'fp': 340, 'fn': 443, 'auroc': 0.9272859099215338, 'auprc': 0.9255350962106819, 'eval_loss': 0.37205086135864257}
Correct predictions are:  4217
Total predictions are:  5000
Accuracy on test set is: 0.8434 



EPOCH NUMBER:  1 (RUN:  1 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6894532325622224, 'tp': 2086, 'tn': 2137, 'fp': 356, 'fn': 421, 'auroc': 0.9297107289321149, 'auprc': 0.9290687569164174, 'eval_loss': 0.3816122207641602}
Correct predictions are:  4223
Total predictions are:  5000
Accuracy on test set is: 0.8446 



EPOCH NUMBER:  2 (RUN:  1 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.684062590058956, 'tp': 2067, 'tn': 2142, 'fp': 351, 'fn': 440, 'auroc': 0.926578944378924, 'auprc': 0.9243450746897243, 'eval_loss': 0.6117204151153565}
Correct predictions are:  4209
Total predictions are:  5000
Accuracy on test set is: 0.8418 



EPOCH NUMBER:  3 (RUN:  1 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6746510872422359, 'tp': 2136, 'tn': 2050, 'fp': 443, 'fn': 371, 'auroc': 0.9171709506202528, 'auprc': 0.9125392935310007, 'eval_loss': 0.8057797685623169}
Correct predictions are:  4186
Total predictions are:  5000
Accuracy on test set is: 0.8372 



EPOCH NUMBER:  4 (RUN:  1 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6879789956542318, 'tp': 2058, 'tn': 2160, 'fp': 333, 'fn': 449, 'auroc': 0.9246510092639126, 'auprc': 0.9218071362561556, 'eval_loss': 0.7311303119659424}
Correct predictions are:  4218
Total predictions are:  5000
Accuracy on test set is: 0.8436 



EPOCH NUMBER:  5 (RUN:  1 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6812275930116148, 'tp': 2122, 'tn': 2081, 'fp': 412, 'fn': 385, 'auroc': 0.9185411213623914, 'auprc': 0.91058585789061, 'eval_loss': 0.7980161355018616}
Correct predictions are:  4203
Total predictions are:  5000
Accuracy on test set is: 0.8406 



EPOCH NUMBER:  6 (RUN:  1 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6824171771408784, 'tp': 2102, 'tn': 2104, 'fp': 389, 'fn': 405, 'auroc': 0.9124820338591455, 'auprc': 0.8932714983000467, 'eval_loss': 0.8673365501403809}
Correct predictions are:  4206
Total predictions are:  5000
Accuracy on test set is: 0.8412 



EPOCH NUMBER:  7 (RUN:  1 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.681084023595305, 'tp': 2053, 'tn': 2148, 'fp': 345, 'fn': 454, 'auroc': 0.9219610681747744, 'auprc': 0.9116611256959781, 'eval_loss': 0.8816281190872193}
Correct predictions are:  4201
Total predictions are:  5000
Accuracy on test set is: 0.8402 



EPOCH NUMBER:  8 (RUN:  1 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6666366184513374, 'tp': 2146, 'tn': 2019, 'fp': 474, 'fn': 361, 'auroc': 0.9147078913098678, 'auprc': 0.9106838359057308, 'eval_loss': 0.8969599601268768}
Correct predictions are:  4165
Total predictions are:  5000
Accuracy on test set is: 0.833 



EPOCH NUMBER:  9 (RUN:  1 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6780850817909525, 'tp': 2086, 'tn': 2109, 'fp': 384, 'fn': 421, 'auroc': 0.89855848469852, 'auprc': 0.9080214785472769, 'eval_loss': 0.8394375566482544}
Correct predictions are:  4195
Total predictions are:  5000
Accuracy on test set is: 0.839 


[0.8434, 0.8446, 0.8418, 0.8372, 0.8436, 0.8406, 0.8412, 0.8402, 0.833, 0.839]

RUN NUMBER:  2


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  2 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6436393663544574, 'tp': 2049, 'tn': 2060, 'fp': 433, 'fn': 458, 'auroc': 0.8984621639433653, 'auprc': 0.8980301744077556, 'eval_loss': 0.41098277282714846}
Correct predictions are:  4109
Total predictions are:  5000
Accuracy on test set is: 0.8218 



EPOCH NUMBER:  1 (RUN:  2 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6915611788825048, 'tp': 2082, 'tn': 2146, 'fp': 347, 'fn': 425, 'auroc': 0.9236904417330631, 'auprc': 0.921661377182376, 'eval_loss': 0.38809669799804686}
Correct predictions are:  4228
Total predictions are:  5000
Accuracy on test set is: 0.8456 



EPOCH NUMBER:  2 (RUN:  2 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6889899342260632, 'tp': 2090, 'tn': 2132, 'fp': 361, 'fn': 417, 'auroc': 0.9229885162299674, 'auprc': 0.9214096322854208, 'eval_loss': 0.49336202697753906}
Correct predictions are:  4222
Total predictions are:  5000
Accuracy on test set is: 0.8444 



EPOCH NUMBER:  3 (RUN:  2 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6808703625894749, 'tp': 2128, 'tn': 2074, 'fp': 419, 'fn': 379, 'auroc': 0.9220473888515286, 'auprc': 0.9201749552647781, 'eval_loss': 0.6620577241897583}
Correct predictions are:  4202
Total predictions are:  5000
Accuracy on test set is: 0.8404 



EPOCH NUMBER:  4 (RUN:  2 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6848065375058403, 'tp': 2108, 'tn': 2104, 'fp': 389, 'fn': 399, 'auroc': 0.920215374488536, 'auprc': 0.9161725362204032, 'eval_loss': 0.653645580291748}
Correct predictions are:  4212
Total predictions are:  5000
Accuracy on test set is: 0.8424 



EPOCH NUMBER:  5 (RUN:  2 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6712962600752271, 'tp': 2049, 'tn': 2128, 'fp': 365, 'fn': 458, 'auroc': 0.9198242514221312, 'auprc': 0.9186434855289882, 'eval_loss': 0.8466745985984803}
Correct predictions are:  4177
Total predictions are:  5000
Accuracy on test set is: 0.8354 



EPOCH NUMBER:  6 (RUN:  2 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6712419785817421, 'tp': 2034, 'tn': 2142, 'fp': 351, 'fn': 473, 'auroc': 0.917951436739264, 'auprc': 0.9150834304762814, 'eval_loss': 0.8244541509628296}
Correct predictions are:  4176
Total predictions are:  5000
Accuracy on test set is: 0.8352 



EPOCH NUMBER:  7 (RUN:  2 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6704552150977428, 'tp': 2050, 'tn': 2125, 'fp': 368, 'fn': 457, 'auroc': 0.9150047736374256, 'auprc': 0.9135372004468926, 'eval_loss': 0.8898915777206421}
Correct predictions are:  4175
Total predictions are:  5000
Accuracy on test set is: 0.835 



EPOCH NUMBER:  8 (RUN:  2 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6837138910172196, 'tp': 2090, 'tn': 2119, 'fp': 374, 'fn': 417, 'auroc': 0.9205849773862228, 'auprc': 0.9178070673770173, 'eval_loss': 0.9114781624317169}
Correct predictions are:  4209
Total predictions are:  5000
Accuracy on test set is: 0.8418 



EPOCH NUMBER:  9 (RUN:  2 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6700435973099819, 'tp': 2111, 'tn': 2064, 'fp': 429, 'fn': 396, 'auroc': 0.9133793208938759, 'auprc': 0.9072932416521958, 'eval_loss': 0.9749947853088379}
Correct predictions are:  4175
Total predictions are:  5000
Accuracy on test set is: 0.835 


[0.8218, 0.8456, 0.8444, 0.8404, 0.8424, 0.8354, 0.8352, 0.835, 0.8418, 0.835]

RUN NUMBER:  3


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  3 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.36664244427752046, 'tp': 1391, 'tn': 1996, 'fp': 497, 'fn': 1116, 'auroc': 0.7459249680517495, 'auprc': 0.7494469591171897, 'eval_loss': 0.640272216796875}
Correct predictions are:  3387
Total predictions are:  5000
Accuracy on test set is: 0.6774 



EPOCH NUMBER:  1 (RUN:  3 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.550222379640239, 'tp': 1870, 'tn': 2003, 'fp': 490, 'fn': 637, 'auroc': 0.8529766873372289, 'auprc': 0.8534157248160482, 'eval_loss': 0.5163562805175781}
Correct predictions are:  3873
Total predictions are:  5000
Accuracy on test set is: 0.7746 



EPOCH NUMBER:  2 (RUN:  3 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5842860387680839, 'tp': 1799, 'tn': 2146, 'fp': 347, 'fn': 708, 'auroc': 0.8717633146243867, 'auprc': 0.8746722126659351, 'eval_loss': 0.49392645568847654}
Correct predictions are:  3945
Total predictions are:  5000
Accuracy on test set is: 0.789 



EPOCH NUMBER:  3 (RUN:  3 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5973549900527083, 'tp': 1977, 'tn': 2016, 'fp': 477, 'fn': 530, 'auroc': 0.8788133698968201, 'auprc': 0.8804372962631335, 'eval_loss': 0.5322738952636719}
Correct predictions are:  3993
Total predictions are:  5000
Accuracy on test set is: 0.7986 



EPOCH NUMBER:  4 (RUN:  3 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6101686280731307, 'tp': 1992, 'tn': 2033, 'fp': 460, 'fn': 515, 'auroc': 0.8858528650864622, 'auprc': 0.8889245335074184, 'eval_loss': 0.5167064453125}
Correct predictions are:  4025
Total predictions are:  5000
Accuracy on test set is: 0.805 



EPOCH NUMBER:  5 (RUN:  3 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5962828034809249, 'tp': 2167, 'tn': 1810, 'fp': 683, 'fn': 340, 'auroc': 0.8880603223929275, 'auprc': 0.8851027932953587, 'eval_loss': 0.6543003288269043}
Correct predictions are:  3977
Total predictions are:  5000
Accuracy on test set is: 0.7954 



EPOCH NUMBER:  6 (RUN:  3 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6152929784025505, 'tp': 2006, 'tn': 2032, 'fp': 461, 'fn': 501, 'auroc': 0.8835863673171198, 'auprc': 0.8799818610148966, 'eval_loss': 0.683696501159668}
Correct predictions are:  4038
Total predictions are:  5000
Accuracy on test set is: 0.8076 



EPOCH NUMBER:  7 (RUN:  3 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6114710011210277, 'tp': 1964, 'tn': 2063, 'fp': 430, 'fn': 543, 'auroc': 0.8874538376380872, 'auprc': 0.8828669206720837, 'eval_loss': 0.8136249244689941}
Correct predictions are:  4027
Total predictions are:  5000
Accuracy on test set is: 0.8054 



EPOCH NUMBER:  8 (RUN:  3 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6162297676006386, 'tp': 1972, 'tn': 2067, 'fp': 426, 'fn': 535, 'auroc': 0.8851630196780742, 'auprc': 0.8736173683525843, 'eval_loss': 0.8101837718963623}
Correct predictions are:  4039
Total predictions are:  5000
Accuracy on test set is: 0.8078 



EPOCH NUMBER:  9 (RUN:  3 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6268031142794381, 'tp': 2149, 'tn': 1912, 'fp': 581, 'fn': 358, 'auroc': 0.8425937259348113, 'auprc': 0.7572007347714814, 'eval_loss': 0.8839665733337402}
Correct predictions are:  4061
Total predictions are:  5000
Accuracy on test set is: 0.8122 


[0.6774, 0.7746, 0.789, 0.7986, 0.805, 0.7954, 0.8076, 0.8054, 0.8078, 0.8122]

RUN NUMBER:  4


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  4 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6828044751962917, 'tp': 2118, 'tn': 2089, 'fp': 404, 'fn': 389, 'auroc': 0.9200257730020602, 'auprc': 0.9179286447097519, 'eval_loss': 0.389539599609375}
Correct predictions are:  4207
Total predictions are:  5000
Accuracy on test set is: 0.8414 



EPOCH NUMBER:  1 (RUN:  4 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6874452380837669, 'tp': 2084, 'tn': 2134, 'fp': 359, 'fn': 423, 'auroc': 0.9247898903527405, 'auprc': 0.9240874269153566, 'eval_loss': 0.46805684576034545}
Correct predictions are:  4218
Total predictions are:  5000
Accuracy on test set is: 0.8436 



EPOCH NUMBER:  2 (RUN:  4 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6753013934669385, 'tp': 2037, 'tn': 2149, 'fp': 344, 'fn': 470, 'auroc': 0.9224348318890819, 'auprc': 0.9206324062882802, 'eval_loss': 0.6355754209518433}
Correct predictions are:  4186
Total predictions are:  5000
Accuracy on test set is: 0.8372 



EPOCH NUMBER:  3 (RUN:  4 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6673147172101026, 'tp': 2043, 'tn': 2124, 'fp': 369, 'fn': 464, 'auroc': 0.9158427002067695, 'auprc': 0.9016160647697767, 'eval_loss': 0.7345135720252991}
Correct predictions are:  4167
Total predictions are:  5000
Accuracy on test set is: 0.8334 



EPOCH NUMBER:  4 (RUN:  4 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.68215761440322, 'tp': 2084, 'tn': 2121, 'fp': 372, 'fn': 423, 'auroc': 0.9211165815539993, 'auprc': 0.9201342378947426, 'eval_loss': 0.8460731486320495}
Correct predictions are:  4205
Total predictions are:  5000
Accuracy on test set is: 0.841 



EPOCH NUMBER:  5 (RUN:  4 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6749223685545275, 'tp': 2151, 'tn': 2035, 'fp': 458, 'fn': 356, 'auroc': 0.9192181666704267, 'auprc': 0.9176460986513669, 'eval_loss': 0.8443938572883606}
Correct predictions are:  4186
Total predictions are:  5000
Accuracy on test set is: 0.8372 



EPOCH NUMBER:  6 (RUN:  4 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6748816348393334, 'tp': 2122, 'tn': 2065, 'fp': 428, 'fn': 385, 'auroc': 0.9152950159129247, 'auprc': 0.9091065615461549, 'eval_loss': 0.9602524389982223}
Correct predictions are:  4187
Total predictions are:  5000
Accuracy on test set is: 0.8374 



EPOCH NUMBER:  7 (RUN:  4 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.665607890284147, 'tp': 2098, 'tn': 2066, 'fp': 427, 'fn': 409, 'auroc': 0.9109809820908995, 'auprc': 0.9053800401653378, 'eval_loss': 1.0378976443052292}
Correct predictions are:  4164
Total predictions are:  5000
Accuracy on test set is: 0.8328 



EPOCH NUMBER:  8 (RUN:  4 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6676979155121879, 'tp': 2115, 'tn': 2054, 'fp': 439, 'fn': 392, 'auroc': 0.9162611034870514, 'auprc': 0.9089144531646223, 'eval_loss': 0.9348009357452393}
Correct predictions are:  4169
Total predictions are:  5000
Accuracy on test set is: 0.8338 



EPOCH NUMBER:  9 (RUN:  4 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6730929707630272, 'tp': 2170, 'tn': 2010, 'fp': 483, 'fn': 337, 'auroc': 0.9114692259187313, 'auprc': 0.896174476433508, 'eval_loss': 0.9725619953393936}
Correct predictions are:  4180
Total predictions are:  5000
Accuracy on test set is: 0.836 


[0.8414, 0.8436, 0.8372, 0.8334, 0.841, 0.8372, 0.8374, 0.8328, 0.8338, 0.836]

RUN NUMBER:  5


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  5 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6964856218057592, 'tp': 2214, 'tn': 2023, 'fp': 470, 'fn': 293, 'auroc': 0.9287608014846837, 'auprc': 0.9280034668809973, 'eval_loss': 0.3705154876708984}
Correct predictions are:  4237
Total predictions are:  5000
Accuracy on test set is: 0.8474 



EPOCH NUMBER:  1 (RUN:  5 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.7024380340539177, 'tp': 2123, 'tn': 2133, 'fp': 360, 'fn': 384, 'auroc': 0.9291307243848792, 'auprc': 0.9295104607141318, 'eval_loss': 0.40808994064331056}
Correct predictions are:  4256
Total predictions are:  5000
Accuracy on test set is: 0.8512 



EPOCH NUMBER:  2 (RUN:  5 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6780102054449109, 'tp': 2114, 'tn': 2081, 'fp': 412, 'fn': 393, 'auroc': 0.9273819906748069, 'auprc': 0.9284324254812741, 'eval_loss': 0.5847496318817139}
Correct predictions are:  4195
Total predictions are:  5000
Accuracy on test set is: 0.839 



EPOCH NUMBER:  3 (RUN:  5 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6885813053722484, 'tp': 2148, 'tn': 2073, 'fp': 420, 'fn': 359, 'auroc': 0.9298658501482653, 'auprc': 0.930228177199254, 'eval_loss': 0.6553716018676757}
Correct predictions are:  4221
Total predictions are:  5000
Accuracy on test set is: 0.8442 



EPOCH NUMBER:  4 (RUN:  5 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6740994341208211, 'tp': 2123, 'tn': 2062, 'fp': 431, 'fn': 384, 'auroc': 0.9229971562977053, 'auprc': 0.9195171432442396, 'eval_loss': 0.8345484318971634}
Correct predictions are:  4185
Total predictions are:  5000
Accuracy on test set is: 0.837 



EPOCH NUMBER:  5 (RUN:  5 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6900940343825896, 'tp': 2168, 'tn': 2056, 'fp': 437, 'fn': 339, 'auroc': 0.9243054065543872, 'auprc': 0.9194939609173648, 'eval_loss': 0.8488428249359131}
Correct predictions are:  4224
Total predictions are:  5000
Accuracy on test set is: 0.8448 



EPOCH NUMBER:  6 (RUN:  5 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6792646266698729, 'tp': 2090, 'tn': 2108, 'fp': 385, 'fn': 417, 'auroc': 0.9241390852504283, 'auprc': 0.9205267308584666, 'eval_loss': 0.9632840903282166}
Correct predictions are:  4198
Total predictions are:  5000
Accuracy on test set is: 0.8396 



EPOCH NUMBER:  7 (RUN:  5 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6806770751686061, 'tp': 2145, 'tn': 2056, 'fp': 437, 'fn': 362, 'auroc': 0.9209665803779901, 'auprc': 0.9164409165949279, 'eval_loss': 0.9715277597427369}
Correct predictions are:  4201
Total predictions are:  5000
Accuracy on test set is: 0.8402 



EPOCH NUMBER:  8 (RUN:  5 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6875564167926381, 'tp': 2158, 'tn': 2060, 'fp': 433, 'fn': 349, 'auroc': 0.9182143188002594, 'auprc': 0.9106432989609181, 'eval_loss': 0.9962672409057617}
Correct predictions are:  4218
Total predictions are:  5000
Accuracy on test set is: 0.8436 



EPOCH NUMBER:  9 (RUN:  5 COMB:  STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6877962686397755, 'tp': 2088, 'tn': 2131, 'fp': 362, 'fn': 419, 'auroc': 0.9179404766533369, 'auprc': 0.9086165221123268, 'eval_loss': 1.1492286601543427}
Correct predictions are:  4219
Total predictions are:  5000
Accuracy on test set is: 0.8438 


[0.8474, 0.8512, 0.839, 0.8442, 0.837, 0.8448, 0.8396, 0.8402, 0.8436, 0.8438]


 Over all runs maximum accuracies are: [0.8122, 0.8436, 0.8446, 0.8456, 0.8512]
The median is: 0.8446
XLNet Accuracy Score on Test set ->  ['0.8446 +/- 0.032399999999999984']


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a



* * * * EVALUATION USING STM_RSW AS PREPROCESSING FUNCTION * * * *

RUN NUMBER:  1


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  1 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.02006009103947226, 'tp': 2507, 'tn': 2, 'fp': 2491, 'fn': 0, 'auroc': 0.6296612565442513, 'auprc': 0.6304097166748194, 'eval_loss': 0.69399677734375}
Correct predictions are:  2509
Total predictions are:  5000
Accuracy on test set is: 0.5018 



EPOCH NUMBER:  1 (RUN:  1 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.58469478147344, 'tp': 1950, 'tn': 2011, 'fp': 482, 'fn': 557, 'auroc': 0.8703493835391669, 'auprc': 0.8683673559606997, 'eval_loss': 0.478510302734375}
Correct predictions are:  3961
Total predictions are:  5000
Accuracy on test set is: 0.7922 



EPOCH NUMBER:  2 (RUN:  1 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5992250105117961, 'tp': 1996, 'tn': 2002, 'fp': 491, 'fn': 511, 'auroc': 0.8828649216609856, 'auprc': 0.8792383452373865, 'eval_loss': 0.469376123046875}
Correct predictions are:  3998
Total predictions are:  5000
Accuracy on test set is: 0.7996 



EPOCH NUMBER:  3 (RUN:  1 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6035634487697293, 'tp': 2090, 'tn': 1916, 'fp': 577, 'fn': 417, 'auroc': 0.8886744072073525, 'auprc': 0.8779442789411785, 'eval_loss': 0.5297774047851562}
Correct predictions are:  4006
Total predictions are:  5000
Accuracy on test set is: 0.8012 



EPOCH NUMBER:  4 (RUN:  1 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6003364847525005, 'tp': 1887, 'tn': 2107, 'fp': 386, 'fn': 620, 'auroc': 0.8823270774442872, 'auprc': 0.8739844617672969, 'eval_loss': 0.5344234985351562}
Correct predictions are:  3994
Total predictions are:  5000
Accuracy on test set is: 0.7988 



EPOCH NUMBER:  5 (RUN:  1 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.553234662825109, 'tp': 1616, 'tn': 2223, 'fp': 270, 'fn': 891, 'auroc': 0.8226745297683132, 'auprc': 0.8056842496374554, 'eval_loss': 0.61242294921875}
Correct predictions are:  3839
Total predictions are:  5000
Accuracy on test set is: 0.7678 



EPOCH NUMBER:  6 (RUN:  1 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.5548262698379556, 'auprc': 0.5319556686647512, 'eval_loss': 0.70050546875}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  7 (RUN:  1 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5443911550620583, 'tp': 2007, 'tn': 1852, 'fp': 641, 'fn': 500, 'auroc': 0.7710107647243953, 'auprc': 0.7066976799558908, 'eval_loss': 0.5777714233398438}
Correct predictions are:  3859
Total predictions are:  5000
Accuracy on test set is: 0.7718 



EPOCH NUMBER:  8 (RUN:  1 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5886420547040998, 'tp': 1894, 'tn': 2073, 'fp': 420, 'fn': 613, 'auroc': 0.8575939235363605, 'auprc': 0.8630303569121431, 'eval_loss': 0.5466584350585938}
Correct predictions are:  3967
Total predictions are:  5000
Accuracy on test set is: 0.7934 



EPOCH NUMBER:  9 (RUN:  1 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5757532078492922, 'tp': 1906, 'tn': 2031, 'fp': 462, 'fn': 601, 'auroc': 0.8714343520453199, 'auprc': 0.8667777270530299, 'eval_loss': 0.5515296752929687}
Correct predictions are:  3937
Total predictions are:  5000
Accuracy on test set is: 0.7874 


[0.5018, 0.7922, 0.7996, 0.8012, 0.7988, 0.7678, 0.5014, 0.7718, 0.7934, 0.7874]

RUN NUMBER:  2


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  2 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5773737491564419, 'tp': 1950, 'tn': 1993, 'fp': 500, 'fn': 557, 'auroc': 0.8660250296362324, 'auprc': 0.8568118689716351, 'eval_loss': 0.48021669921875}
Correct predictions are:  3943
Total predictions are:  5000
Accuracy on test set is: 0.7886 



EPOCH NUMBER:  1 (RUN:  2 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6001771028537302, 'tp': 1857, 'tn': 2133, 'fp': 360, 'fn': 650, 'auroc': 0.8916723507112295, 'auprc': 0.8904694815177447, 'eval_loss': 0.4604613037109375}
Correct predictions are:  3990
Total predictions are:  5000
Accuracy on test set is: 0.798 



EPOCH NUMBER:  2 (RUN:  2 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6172544945082935, 'tp': 1982, 'tn': 2060, 'fp': 433, 'fn': 525, 'auroc': 0.889082570407352, 'auprc': 0.889064294189054, 'eval_loss': 0.46658922729492186}
Correct predictions are:  4042
Total predictions are:  5000
Accuracy on test set is: 0.8084 



EPOCH NUMBER:  3 (RUN:  2 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6417764917189999, 'tp': 2129, 'tn': 1973, 'fp': 520, 'fn': 378, 'auroc': 0.8989482477542625, 'auprc': 0.8970393218965091, 'eval_loss': 0.5017362884521485}
Correct predictions are:  4102
Total predictions are:  5000
Accuracy on test set is: 0.8204 



EPOCH NUMBER:  4 (RUN:  2 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6316047007963957, 'tp': 2055, 'tn': 2024, 'fp': 469, 'fn': 452, 'auroc': 0.8949170961500337, 'auprc': 0.8893389155785524, 'eval_loss': 0.622302847290039}
Correct predictions are:  4079
Total predictions are:  5000
Accuracy on test set is: 0.8158 



EPOCH NUMBER:  5 (RUN:  2 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6192523540892118, 'tp': 2050, 'tn': 1998, 'fp': 495, 'fn': 457, 'auroc': 0.8935772456456058, 'auprc': 0.8859752552331435, 'eval_loss': 0.6807337799072266}
Correct predictions are:  4048
Total predictions are:  5000
Accuracy on test set is: 0.8096 



EPOCH NUMBER:  6 (RUN:  2 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6030841195805053, 'tp': 1838, 'tn': 2156, 'fp': 337, 'fn': 669, 'auroc': 0.8754317433848682, 'auprc': 0.8621614304255909, 'eval_loss': 0.6902243041992188}
Correct predictions are:  3994
Total predictions are:  5000
Accuracy on test set is: 0.7988 



EPOCH NUMBER:  7 (RUN:  2 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6196510759216386, 'tp': 1967, 'tn': 2080, 'fp': 413, 'fn': 540, 'auroc': 0.8943049313506618, 'auprc': 0.8909446998522568, 'eval_loss': 0.6497733795166015}
Correct predictions are:  4047
Total predictions are:  5000
Accuracy on test set is: 0.8094 



EPOCH NUMBER:  8 (RUN:  2 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6396147421177293, 'tp': 2049, 'tn': 2050, 'fp': 443, 'fn': 458, 'auroc': 0.8963791076122037, 'auprc': 0.8883532729062609, 'eval_loss': 0.6918403831481934}
Correct predictions are:  4099
Total predictions are:  5000
Accuracy on test set is: 0.8198 



EPOCH NUMBER:  9 (RUN:  2 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6348077810026734, 'tp': 2045, 'tn': 2042, 'fp': 451, 'fn': 462, 'auroc': 0.8950411771228287, 'auprc': 0.8763815861434854, 'eval_loss': 0.6827293106079102}
Correct predictions are:  4087
Total predictions are:  5000
Accuracy on test set is: 0.8174 


[0.7886, 0.798, 0.8084, 0.8204, 0.8158, 0.8096, 0.7988, 0.8094, 0.8198, 0.8174]

RUN NUMBER:  3


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  3 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.49640677306759917, 'tp': 1890, 'tn': 1851, 'fp': 642, 'fn': 617, 'auroc': 0.826866962636987, 'auprc': 0.8244844128413592, 'eval_loss': 0.5246639404296874}
Correct predictions are:  3741
Total predictions are:  5000
Accuracy on test set is: 0.7482 



EPOCH NUMBER:  1 (RUN:  3 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5610309305152718, 'tp': 2109, 'tn': 1783, 'fp': 710, 'fn': 398, 'auroc': 0.8557068687418509, 'auprc': 0.8516037759732543, 'eval_loss': 0.5221061065673828}
Correct predictions are:  3892
Total predictions are:  5000
Accuracy on test set is: 0.7784 



EPOCH NUMBER:  2 (RUN:  3 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5980773576101341, 'tp': 1938, 'tn': 2055, 'fp': 438, 'fn': 569, 'auroc': 0.8793382540119116, 'auprc': 0.8751250333865707, 'eval_loss': 0.4961970886230469}
Correct predictions are:  3993
Total predictions are:  5000
Accuracy on test set is: 0.7986 



EPOCH NUMBER:  3 (RUN:  3 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6151245397033145, 'tp': 2088, 'tn': 1948, 'fp': 545, 'fn': 419, 'auroc': 0.8597359403297722, 'auprc': 0.8717734517567781, 'eval_loss': 0.5022786743164063}
Correct predictions are:  4036
Total predictions are:  5000
Accuracy on test set is: 0.8072 



EPOCH NUMBER:  4 (RUN:  3 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6333332025625598, 'tp': 1956, 'tn': 2123, 'fp': 370, 'fn': 551, 'auroc': 0.8499429835529912, 'auprc': 0.8698355649015661, 'eval_loss': 0.5048018249511719}
Correct predictions are:  4079
Total predictions are:  5000
Accuracy on test set is: 0.8158 



EPOCH NUMBER:  5 (RUN:  3 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.622780324705339, 'tp': 2167, 'tn': 1881, 'fp': 612, 'fn': 340, 'auroc': 0.8909894653574084, 'auprc': 0.8828260010370939, 'eval_loss': 0.5102761352539062}
Correct predictions are:  4048
Total predictions are:  5000
Accuracy on test set is: 0.8096 



EPOCH NUMBER:  6 (RUN:  3 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6223885479775026, 'tp': 2081, 'tn': 1974, 'fp': 519, 'fn': 426, 'auroc': 0.8731793257259135, 'auprc': 0.8617625287887757, 'eval_loss': 0.5440385681152343}
Correct predictions are:  4055
Total predictions are:  5000
Accuracy on test set is: 0.811 



EPOCH NUMBER:  7 (RUN:  3 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6231545298333466, 'tp': 2080, 'tn': 1977, 'fp': 516, 'fn': 427, 'auroc': 0.8925939579366302, 'auprc': 0.8806679468352652, 'eval_loss': 0.615846760559082}
Correct predictions are:  4057
Total predictions are:  5000
Accuracy on test set is: 0.8114 



EPOCH NUMBER:  8 (RUN:  3 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6243238754458328, 'tp': 1963, 'tn': 2095, 'fp': 398, 'fn': 544, 'auroc': 0.8808275456879581, 'auprc': 0.870790113240881, 'eval_loss': 0.7108871047973633}
Correct predictions are:  4058
Total predictions are:  5000
Accuracy on test set is: 0.8116 



EPOCH NUMBER:  9 (RUN:  3 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6210505969178012, 'tp': 2092, 'tn': 1959, 'fp': 534, 'fn': 415, 'auroc': 0.8351536676047541, 'auprc': 0.7835051764866059, 'eval_loss': 0.7703323028564453}
Correct predictions are:  4051
Total predictions are:  5000
Accuracy on test set is: 0.8102 


[0.7482, 0.7784, 0.7986, 0.8072, 0.8158, 0.8096, 0.811, 0.8114, 0.8116, 0.8102]

RUN NUMBER:  4


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  4 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5628563992425561, 'tp': 2144, 'tn': 1747, 'fp': 746, 'fn': 363, 'auroc': 0.8617777963379233, 'auprc': 0.8574565366515784, 'eval_loss': 0.4943938110351562}
Correct predictions are:  3891
Total predictions are:  5000
Accuracy on test set is: 0.7782 



EPOCH NUMBER:  1 (RUN:  4 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6007821978318352, 'tp': 1965, 'tn': 2036, 'fp': 457, 'fn': 542, 'auroc': 0.8811149079408782, 'auprc': 0.8704133782500821, 'eval_loss': 0.4516842529296875}
Correct predictions are:  4001
Total predictions are:  5000
Accuracy on test set is: 0.8002 



EPOCH NUMBER:  2 (RUN:  4 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6316893733386268, 'tp': 2070, 'tn': 2009, 'fp': 484, 'fn': 437, 'auroc': 0.8946930143932328, 'auprc': 0.8899011318950105, 'eval_loss': 0.48257288208007815}
Correct predictions are:  4079
Total predictions are:  5000
Accuracy on test set is: 0.8158 



EPOCH NUMBER:  3 (RUN:  4 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6016621055112076, 'tp': 1869, 'tn': 2126, 'fp': 367, 'fn': 638, 'auroc': 0.8874205573771698, 'auprc': 0.8823384497128728, 'eval_loss': 0.47908500671386717}
Correct predictions are:  3995
Total predictions are:  5000
Accuracy on test set is: 0.799 



EPOCH NUMBER:  4 (RUN:  4 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6403729712717245, 'tp': 2120, 'tn': 1979, 'fp': 514, 'fn': 387, 'auroc': 0.8971347135361543, 'auprc': 0.8912356114258954, 'eval_loss': 0.5066691619873047}
Correct predictions are:  4099
Total predictions are:  5000
Accuracy on test set is: 0.8198 



EPOCH NUMBER:  5 (RUN:  4 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6424663604622264, 'tp': 2150, 'tn': 1952, 'fp': 541, 'fn': 357, 'auroc': 0.8999735357925207, 'auprc': 0.895219025399544, 'eval_loss': 0.472767724609375}
Correct predictions are:  4102
Total predictions are:  5000
Accuracy on test set is: 0.8204 



EPOCH NUMBER:  6 (RUN:  4 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6292966756457745, 'tp': 1941, 'tn': 2127, 'fp': 366, 'fn': 566, 'auroc': 0.8978172788874664, 'auprc': 0.8907407182033047, 'eval_loss': 0.6091491912841797}
Correct predictions are:  4068
Total predictions are:  5000
Accuracy on test set is: 0.8136 



EPOCH NUMBER:  7 (RUN:  4 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.641665961090553, 'tp': 2125, 'tn': 1977, 'fp': 516, 'fn': 382, 'auroc': 0.8810891477389183, 'auprc': 0.8748436737338343, 'eval_loss': 0.6355955596923828}
Correct predictions are:  4102
Total predictions are:  5000
Accuracy on test set is: 0.8204 



EPOCH NUMBER:  8 (RUN:  4 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6331967640992033, 'tp': 2049, 'tn': 2034, 'fp': 459, 'fn': 458, 'auroc': 0.8616865156222824, 'auprc': 0.8431285063500227, 'eval_loss': 0.762684536743164}
Correct predictions are:  4083
Total predictions are:  5000
Accuracy on test set is: 0.8166 



EPOCH NUMBER:  9 (RUN:  4 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5981149572808585, 'tp': 2231, 'tn': 1737, 'fp': 756, 'fn': 276, 'auroc': 0.8158236760576203, 'auprc': 0.7443785025002799, 'eval_loss': 0.5458088348388672}
Correct predictions are:  3968
Total predictions are:  5000
Accuracy on test set is: 0.7936 


[0.7782, 0.8002, 0.8158, 0.799, 0.8198, 0.8204, 0.8136, 0.8204, 0.8166, 0.7936]

RUN NUMBER:  5


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  5 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.4906334613622719, 'tp': 1581, 'tn': 2115, 'fp': 378, 'fn': 926, 'auroc': 0.8312074766666171, 'auprc': 0.8235527681315006, 'eval_loss': 0.5799354248046875}
Correct predictions are:  3696
Total predictions are:  5000
Accuracy on test set is: 0.7392 



EPOCH NUMBER:  1 (RUN:  5 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6042660469237989, 'tp': 2151, 'tn': 1850, 'fp': 643, 'fn': 356, 'auroc': 0.8848906975430687, 'auprc': 0.8807422722403502, 'eval_loss': 0.4343163269042969}
Correct predictions are:  4001
Total predictions are:  5000
Accuracy on test set is: 0.8002 



EPOCH NUMBER:  2 (RUN:  5 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6250951853366289, 'tp': 1965, 'tn': 2095, 'fp': 398, 'fn': 542, 'auroc': 0.8965657490754727, 'auprc': 0.8900373213323773, 'eval_loss': 0.4530185089111328}
Correct predictions are:  4060
Total predictions are:  5000
Accuracy on test set is: 0.812 



EPOCH NUMBER:  3 (RUN:  5 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.634282041178775, 'tp': 2014, 'tn': 2071, 'fp': 422, 'fn': 493, 'auroc': 0.8896457748228745, 'auprc': 0.8862631310465326, 'eval_loss': 0.5104383178710937}
Correct predictions are:  4085
Total predictions are:  5000
Accuracy on test set is: 0.817 



EPOCH NUMBER:  4 (RUN:  5 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.611822369441082, 'tp': 1905, 'tn': 2118, 'fp': 375, 'fn': 602, 'auroc': 0.8881980834729746, 'auprc': 0.882292669404531, 'eval_loss': 0.5676688751220703}
Correct predictions are:  4023
Total predictions are:  5000
Accuracy on test set is: 0.8046 



EPOCH NUMBER:  5 (RUN:  5 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6064767768697092, 'tp': 1967, 'tn': 2048, 'fp': 445, 'fn': 540, 'auroc': 0.8870953548275818, 'auprc': 0.8769954460195267, 'eval_loss': 0.6604726898193359}
Correct predictions are:  4015
Total predictions are:  5000
Accuracy on test set is: 0.803 



EPOCH NUMBER:  6 (RUN:  5 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6171756423762033, 'tp': 2074, 'tn': 1968, 'fp': 525, 'fn': 433, 'auroc': 0.8897027352694444, 'auprc': 0.882904477426367, 'eval_loss': 0.649393392944336}
Correct predictions are:  4042
Total predictions are:  5000
Accuracy on test set is: 0.8084 



EPOCH NUMBER:  7 (RUN:  5 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6232296196441204, 'tp': 1991, 'tn': 2066, 'fp': 427, 'fn': 516, 'auroc': 0.8934271644689694, 'auprc': 0.8797603380005834, 'eval_loss': 0.7344127212524414}
Correct predictions are:  4057
Total predictions are:  5000
Accuracy on test set is: 0.8114 



EPOCH NUMBER:  8 (RUN:  5 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6246504684222588, 'tp': 1966, 'tn': 2093, 'fp': 400, 'fn': 541, 'auroc': 0.8941294899752014, 'auprc': 0.8803398654993481, 'eval_loss': 0.740088510131836}
Correct predictions are:  4059
Total predictions are:  5000
Accuracy on test set is: 0.8118 



EPOCH NUMBER:  9 (RUN:  5 COMB:  STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6268654789864317, 'tp': 2024, 'tn': 2043, 'fp': 450, 'fn': 483, 'auroc': 0.8708197072265047, 'auprc': 0.8249617584297831, 'eval_loss': 0.7613984573364257}
Correct predictions are:  4067
Total predictions are:  5000
Accuracy on test set is: 0.8134 


[0.7392, 0.8002, 0.812, 0.817, 0.8046, 0.803, 0.8084, 0.8114, 0.8118, 0.8134]


 Over all runs maximum accuracies are: [0.8012, 0.8158, 0.817, 0.8204, 0.8204]
The median is: 0.817
XLNet Accuracy Score on Test set ->  ['0.817 +/- 0.015799999999999925']


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a



* * * * EVALUATION USING LOW_STM_RSW AS PREPROCESSING FUNCTION * * * *

RUN NUMBER:  1


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  1 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.5335149027568374, 'auprc': 0.5259551284450559, 'eval_loss': 0.694921875}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  1 (RUN:  1 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.3937811149007046, 'tp': 1526, 'tn': 1943, 'fp': 550, 'fn': 981, 'auroc': 0.7611906077343646, 'auprc': 0.7493912870419273, 'eval_loss': 0.6366652099609375}
Correct predictions are:  3469
Total predictions are:  5000
Accuracy on test set is: 0.6938 



EPOCH NUMBER:  2 (RUN:  1 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.3031566928041912, 'tp': 854, 'tn': 2266, 'fp': 227, 'fn': 1653, 'auroc': 0.7887340236747457, 'auprc': 0.7267923856840119, 'eval_loss': 0.6202804321289063}
Correct predictions are:  3120
Total predictions are:  5000
Accuracy on test set is: 0.624 



EPOCH NUMBER:  3 (RUN:  1 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5262879684055405, 'tp': 1853, 'tn': 1961, 'fp': 532, 'fn': 654, 'auroc': 0.8385797744654319, 'auprc': 0.8424077127779589, 'eval_loss': 0.5266200805664063}
Correct predictions are:  3814
Total predictions are:  5000
Accuracy on test set is: 0.7628 



EPOCH NUMBER:  4 (RUN:  1 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5277950646065018, 'tp': 1783, 'tn': 2029, 'fp': 464, 'fn': 724, 'auroc': 0.84345557269169, 'auprc': 0.8333854843030278, 'eval_loss': 0.522208447265625}
Correct predictions are:  3812
Total predictions are:  5000
Accuracy on test set is: 0.7624 



EPOCH NUMBER:  5 (RUN:  1 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5212670018858158, 'tp': 1858, 'tn': 1944, 'fp': 549, 'fn': 649, 'auroc': 0.8107440362332441, 'auprc': 0.8107142704399048, 'eval_loss': 0.5675549072265625}
Correct predictions are:  3802
Total predictions are:  5000
Accuracy on test set is: 0.7604 



EPOCH NUMBER:  6 (RUN:  1 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5059214862572536, 'tp': 1603, 'tn': 2131, 'fp': 362, 'fn': 904, 'auroc': 0.839145218898516, 'auprc': 0.825792498204854, 'eval_loss': 0.54422666015625}
Correct predictions are:  3734
Total predictions are:  5000
Accuracy on test set is: 0.7468 



EPOCH NUMBER:  7 (RUN:  1 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5417799648809166, 'tp': 2157, 'tn': 1674, 'fp': 819, 'fn': 350, 'auroc': 0.8497800222753746, 'auprc': 0.8448239652982814, 'eval_loss': 0.5359682983398437}
Correct predictions are:  3831
Total predictions are:  5000
Accuracy on test set is: 0.7662 



EPOCH NUMBER:  8 (RUN:  1 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5541179404979999, 'tp': 2077, 'tn': 1801, 'fp': 692, 'fn': 430, 'auroc': 0.8276120084781464, 'auprc': 0.7845428318270244, 'eval_loss': 0.5330121948242188}
Correct predictions are:  3878
Total predictions are:  5000
Accuracy on test set is: 0.7756 



EPOCH NUMBER:  9 (RUN:  1 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5631568877097739, 'tp': 2008, 'tn': 1899, 'fp': 594, 'fn': 499, 'auroc': 0.860848989056074, 'auprc': 0.8498329938729746, 'eval_loss': 0.5383110595703124}
Correct predictions are:  3907
Total predictions are:  5000
Accuracy on test set is: 0.7814 


[0.4986, 0.6938, 0.624, 0.7628, 0.7624, 0.7604, 0.7468, 0.7662, 0.7756, 0.7814]

RUN NUMBER:  2


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  2 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6076630069915979, 'tp': 1951, 'tn': 2066, 'fp': 427, 'fn': 556, 'auroc': 0.8843730134844258, 'auprc': 0.8795486771964263, 'eval_loss': 0.45317745361328127}
Correct predictions are:  4017
Total predictions are:  5000
Accuracy on test set is: 0.8034 



EPOCH NUMBER:  1 (RUN:  2 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6163587791589493, 'tp': 2136, 'tn': 1899, 'fp': 594, 'fn': 371, 'auroc': 0.8892286515526282, 'auprc': 0.8870069988362733, 'eval_loss': 0.45952063598632814}
Correct predictions are:  4035
Total predictions are:  5000
Accuracy on test set is: 0.807 



EPOCH NUMBER:  2 (RUN:  2 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6198215782803388, 'tp': 1977, 'tn': 2071, 'fp': 422, 'fn': 530, 'auroc': 0.8923641961352977, 'auprc': 0.8911491112931865, 'eval_loss': 0.4628898040771484}
Correct predictions are:  4048
Total predictions are:  5000
Accuracy on test set is: 0.8096 



EPOCH NUMBER:  3 (RUN:  2 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6273507912794821, 'tp': 2111, 'tn': 1955, 'fp': 538, 'fn': 396, 'auroc': 0.8936072458808076, 'auprc': 0.8894208269418733, 'eval_loss': 0.48761851196289063}
Correct predictions are:  4066
Total predictions are:  5000
Accuracy on test set is: 0.8132 



EPOCH NUMBER:  4 (RUN:  2 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6380906156396848, 'tp': 2035, 'tn': 2060, 'fp': 433, 'fn': 472, 'auroc': 0.8898606565075471, 'auprc': 0.8851287564505146, 'eval_loss': 0.526312387084961}
Correct predictions are:  4095
Total predictions are:  5000
Accuracy on test set is: 0.819 



EPOCH NUMBER:  5 (RUN:  2 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6449775300727831, 'tp': 1995, 'tn': 2115, 'fp': 378, 'fn': 512, 'auroc': 0.8922369151374147, 'auprc': 0.8603691353377091, 'eval_loss': 0.6171101913452148}
Correct predictions are:  4110
Total predictions are:  5000
Accuracy on test set is: 0.822 



EPOCH NUMBER:  6 (RUN:  2 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.633755910398628, 'tp': 1998, 'tn': 2085, 'fp': 408, 'fn': 509, 'auroc': 0.8877653600804231, 'auprc': 0.8721903010273591, 'eval_loss': 0.685440869140625}
Correct predictions are:  4083
Total predictions are:  5000
Accuracy on test set is: 0.8166 



EPOCH NUMBER:  7 (RUN:  2 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6440867639249527, 'tp': 2043, 'tn': 2067, 'fp': 426, 'fn': 464, 'auroc': 0.8992376100228627, 'auprc': 0.8919939934238468, 'eval_loss': 0.692087760925293}
Correct predictions are:  4110
Total predictions are:  5000
Accuracy on test set is: 0.822 



EPOCH NUMBER:  8 (RUN:  2 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6386616184143268, 'tp': 2093, 'tn': 2003, 'fp': 490, 'fn': 414, 'auroc': 0.881591551677765, 'auprc': 0.8397617861894242, 'eval_loss': 0.7652329536437988}
Correct predictions are:  4096
Total predictions are:  5000
Accuracy on test set is: 0.8192 



EPOCH NUMBER:  9 (RUN:  2 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6421092162470344, 'tp': 2011, 'tn': 2093, 'fp': 400, 'fn': 496, 'auroc': 0.8656619067893493, 'auprc': 0.8332577833834725, 'eval_loss': 0.7630685165405273}
Correct predictions are:  4104
Total predictions are:  5000
Accuracy on test set is: 0.8208 


[0.8034, 0.807, 0.8096, 0.8132, 0.819, 0.822, 0.8166, 0.822, 0.8192, 0.8208]

RUN NUMBER:  3


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  3 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.5099659181327983, 'auprc': 0.5066756850854635, 'eval_loss': 0.69318544921875}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  1 (RUN:  3 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.5199883167084031, 'auprc': 0.5117893213463067, 'eval_loss': 0.7027130859375}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  2 (RUN:  3 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.5092547125569464, 'auprc': 0.5022291713011279, 'eval_loss': 0.69351943359375}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  3 (RUN:  3 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.5060729276117526, 'auprc': 0.5053169239613465, 'eval_loss': 0.6951130859375}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  4 (RUN:  3 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.7562224887843121, 'auprc': 0.7255346872188231, 'eval_loss': 0.6973294921875}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  5 (RUN:  3 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.7508783668863964, 'auprc': 0.7567138264101296, 'eval_loss': 0.69441533203125}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  6 (RUN:  3 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.24994585287909724, 'tp': 1952, 'tn': 1142, 'fp': 1351, 'fn': 555, 'auroc': 0.6723575912835156, 'auprc': 0.5991515946956208, 'eval_loss': 0.646410595703125}
Correct predictions are:  3094
Total predictions are:  5000
Accuracy on test set is: 0.6188 



EPOCH NUMBER:  7 (RUN:  3 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5371787539858666, 'tp': 1863, 'tn': 1978, 'fp': 515, 'fn': 644, 'auroc': 0.846594477300702, 'auprc': 0.8365938443752956, 'eval_loss': 0.5131105560302734}
Correct predictions are:  3841
Total predictions are:  5000
Accuracy on test set is: 0.7682 



EPOCH NUMBER:  8 (RUN:  3 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.567649279299975, 'tp': 1815, 'tn': 2094, 'fp': 399, 'fn': 692, 'auroc': 0.8650413419241206, 'auprc': 0.8493374608626773, 'eval_loss': 0.5493881713867188}
Correct predictions are:  3909
Total predictions are:  5000
Accuracy on test set is: 0.7818 



EPOCH NUMBER:  9 (RUN:  3 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5632558544917509, 'tp': 2086, 'tn': 1815, 'fp': 678, 'fn': 421, 'auroc': 0.8042562253688068, 'auprc': 0.7205246701091973, 'eval_loss': 0.5408214599609374}
Correct predictions are:  3901
Total predictions are:  5000
Accuracy on test set is: 0.7802 


[0.4986, 0.5014, 0.5014, 0.5014, 0.5014, 0.5014, 0.6188, 0.7682, 0.7818, 0.7802]

RUN NUMBER:  4


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  4 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.5299811950525692, 'auprc': 0.5195222704645907, 'eval_loss': 0.69318447265625}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  1 (RUN:  4 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.49967703746797376, 'auprc': 0.5014315097489548, 'eval_loss': 0.69396416015625}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  2 (RUN:  4 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.500488403829086, 'auprc': 0.5018395371977153, 'eval_loss': 0.6939685546875}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  3 (RUN:  4 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.779244189274444, 'auprc': 0.7636771200144128, 'eval_loss': 0.69206259765625}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  4 (RUN:  4 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.47872207318105375, 'auprc': 0.49078598507689064, 'eval_loss': 0.698514453125}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  5 (RUN:  4 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.534352909326809, 'auprc': 0.5212621371819693, 'eval_loss': 0.69314072265625}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  6 (RUN:  4 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.4905503259145551, 'auprc': 0.496733342558263, 'eval_loss': 0.69392744140625}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  7 (RUN:  4 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.4903495243402708, 'auprc': 0.49678730098757246, 'eval_loss': 0.6941958984375}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  8 (RUN:  4 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.5020727362502522, 'auprc': 0.5092044573143224, 'eval_loss': 0.69328125}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  9 (RUN:  4 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.3697161695109429, 'tp': 2265, 'tn': 1047, 'fp': 1446, 'fn': 242, 'auroc': 0.6688390836984163, 'auprc': 0.6019710160205096, 'eval_loss': 0.62083056640625}
Correct predictions are:  3312
Total predictions are:  5000
Accuracy on test set is: 0.6624 


[0.5014, 0.5014, 0.5014, 0.5014, 0.4986, 0.4986, 0.5014, 0.5014, 0.4986, 0.6624]

RUN NUMBER:  5


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  5 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.7375730625728106, 'auprc': 0.7448451412002977, 'eval_loss': 0.69572109375}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  1 (RUN:  5 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5401922708019796, 'tp': 2137, 'tn': 1694, 'fp': 799, 'fn': 370, 'auroc': 0.8502605860429946, 'auprc': 0.8443925328902603, 'eval_loss': 0.5254494689941406}
Correct predictions are:  3831
Total predictions are:  5000
Accuracy on test set is: 0.7662 



EPOCH NUMBER:  2 (RUN:  5 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5731971777411186, 'tp': 1973, 'tn': 1960, 'fp': 533, 'fn': 534, 'auroc': 0.8616254751437251, 'auprc': 0.8597033544169057, 'eval_loss': 0.5421821350097656}
Correct predictions are:  3933
Total predictions are:  5000
Accuracy on test set is: 0.7866 



EPOCH NUMBER:  3 (RUN:  5 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.563033888641096, 'tp': 2051, 'tn': 1853, 'fp': 640, 'fn': 456, 'auroc': 0.8533986106451075, 'auprc': 0.8223479839346169, 'eval_loss': 0.5429421142578125}
Correct predictions are:  3904
Total predictions are:  5000
Accuracy on test set is: 0.7808 



EPOCH NUMBER:  4 (RUN:  5 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.7070185830256909, 'auprc': 0.7258561450356193, 'eval_loss': 0.6904570068359375}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  5 (RUN:  5 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5608361394766643, 'tp': 1814, 'tn': 2079, 'fp': 414, 'fn': 693, 'auroc': 0.8635086899081289, 'auprc': 0.8500862831208592, 'eval_loss': 0.536295166015625}
Correct predictions are:  3893
Total predictions are:  5000
Accuracy on test set is: 0.7786 



EPOCH NUMBER:  6 (RUN:  5 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5532449240628124, 'tp': 1672, 'tn': 2180, 'fp': 313, 'fn': 835, 'auroc': 0.8618676370422742, 'auprc': 0.8461961397432698, 'eval_loss': 0.5469217407226562}
Correct predictions are:  3852
Total predictions are:  5000
Accuracy on test set is: 0.7704 



EPOCH NUMBER:  7 (RUN:  5 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5900435945985469, 'tp': 1981, 'tn': 1994, 'fp': 499, 'fn': 526, 'auroc': 0.8774773594224978, 'auprc': 0.8733188785767892, 'eval_loss': 0.5267722930908203}
Correct predictions are:  3975
Total predictions are:  5000
Accuracy on test set is: 0.795 



EPOCH NUMBER:  8 (RUN:  5 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5850915274960683, 'tp': 1913, 'tn': 2047, 'fp': 446, 'fn': 594, 'auroc': 0.7759901637628839, 'auprc': 0.8170494983391974, 'eval_loss': 0.6006576477050781}
Correct predictions are:  3960
Total predictions are:  5000
Accuracy on test set is: 0.792 



EPOCH NUMBER:  9 (RUN:  5 COMB:  LOW_STM_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.599229935179578, 'tp': 1995, 'tn': 2003, 'fp': 490, 'fn': 512, 'auroc': 0.87492421940588, 'auprc': 0.8553196680221322, 'eval_loss': 0.5942774353027344}
Correct predictions are:  3998
Total predictions are:  5000
Accuracy on test set is: 0.7996 


[0.5014, 0.7662, 0.7866, 0.7808, 0.5014, 0.7786, 0.7704, 0.795, 0.792, 0.7996]


 Over all runs maximum accuracies are: [0.6624, 0.7814, 0.7818, 0.7996, 0.822]
The median is: 0.7818
XLNet Accuracy Score on Test set ->  ['0.7818 +/- 0.11940000000000006']


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a



* * * * EVALUATION USING LOW_RSW_STM AS PREPROCESSING FUNCTION * * * *

RUN NUMBER:  1


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  1 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.33039516334325564, 'tp': 750, 'tn': 2372, 'fp': 121, 'fn': 1757, 'auroc': 0.7347540804719909, 'auprc': 0.7548715990263382, 'eval_loss': 0.6843708984375}
Correct predictions are:  3122
Total predictions are:  5000
Accuracy on test set is: 0.6244 



EPOCH NUMBER:  1 (RUN:  1 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.604796095567777, 'tp': 1950, 'tn': 2060, 'fp': 433, 'fn': 557, 'auroc': 0.8829051619764698, 'auprc': 0.871596348072067, 'eval_loss': 0.447075439453125}
Correct predictions are:  4010
Total predictions are:  5000
Accuracy on test set is: 0.802 



EPOCH NUMBER:  2 (RUN:  1 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6127810529037891, 'tp': 1946, 'tn': 2083, 'fp': 410, 'fn': 561, 'auroc': 0.8877280797881455, 'auprc': 0.8791273235006798, 'eval_loss': 0.47753663940429686}
Correct predictions are:  4029
Total predictions are:  5000
Accuracy on test set is: 0.8058 



EPOCH NUMBER:  3 (RUN:  1 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6367646212957223, 'tp': 2097, 'tn': 1994, 'fp': 499, 'fn': 410, 'auroc': 0.8963518273983269, 'auprc': 0.8899621724683594, 'eval_loss': 0.46965281982421875}
Correct predictions are:  4091
Total predictions are:  5000
Accuracy on test set is: 0.8182 



EPOCH NUMBER:  4 (RUN:  1 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6341495354787806, 'tp': 1982, 'tn': 2101, 'fp': 392, 'fn': 525, 'auroc': 0.8939486085570909, 'auprc': 0.8683884379720632, 'eval_loss': 0.5003882720947266}
Correct predictions are:  4083
Total predictions are:  5000
Accuracy on test set is: 0.8166 



EPOCH NUMBER:  5 (RUN:  1 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6347477424483431, 'tp': 1942, 'tn': 2139, 'fp': 354, 'fn': 565, 'auroc': 0.9023569144782094, 'auprc': 0.8959231932158781, 'eval_loss': 0.6307746704101562}
Correct predictions are:  4081
Total predictions are:  5000
Accuracy on test set is: 0.8162 



EPOCH NUMBER:  6 (RUN:  1 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6361920120309369, 'tp': 2085, 'tn': 2005, 'fp': 488, 'fn': 422, 'auroc': 0.887856720796691, 'auprc': 0.8499435219919125, 'eval_loss': 0.6456489242553711}
Correct predictions are:  4090
Total predictions are:  5000
Accuracy on test set is: 0.818 



EPOCH NUMBER:  7 (RUN:  1 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6044968070425508, 'tp': 2082, 'tn': 1927, 'fp': 566, 'fn': 425, 'auroc': 0.879238893232923, 'auprc': 0.8642886352793647, 'eval_loss': 0.6046058609008789}
Correct predictions are:  4009
Total predictions are:  5000
Accuracy on test set is: 0.8018 



EPOCH NUMBER:  8 (RUN:  1 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5697304095597834, 'tp': 1640, 'tn': 2240, 'fp': 253, 'fn': 867, 'auroc': 0.843857655844022, 'auprc': 0.8302821365456525, 'eval_loss': 0.5870301513671875}
Correct predictions are:  3880
Total predictions are:  5000
Accuracy on test set is: 0.776 



EPOCH NUMBER:  9 (RUN:  1 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.501686182332384, 'tp': 1336, 'tn': 2314, 'fp': 179, 'fn': 1171, 'auroc': 0.6744483276748889, 'auprc': 0.7432692320894577, 'eval_loss': 0.5915301452636719}
Correct predictions are:  3650
Total predictions are:  5000
Accuracy on test set is: 0.73 


[0.6244, 0.802, 0.8058, 0.8182, 0.8166, 0.8162, 0.818, 0.8018, 0.776, 0.73]

RUN NUMBER:  2


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  2 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6677788156921391, 'tp': 2160, 'tn': 2007, 'fp': 486, 'fn': 347, 'auroc': 0.9104507379337854, 'auprc': 0.9070898974843218, 'eval_loss': 0.40015294799804685}
Correct predictions are:  4167
Total predictions are:  5000
Accuracy on test set is: 0.8334 



EPOCH NUMBER:  1 (RUN:  2 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6712170606596303, 'tp': 2088, 'tn': 2090, 'fp': 403, 'fn': 419, 'auroc': 0.9157619795739199, 'auprc': 0.9109052468097255, 'eval_loss': 0.39350372314453125}
Correct predictions are:  4178
Total predictions are:  5000
Accuracy on test set is: 0.8356 



EPOCH NUMBER:  2 (RUN:  2 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6647082026421306, 'tp': 2051, 'tn': 2110, 'fp': 383, 'fn': 456, 'auroc': 0.9171099101416955, 'auprc': 0.9138348943548829, 'eval_loss': 0.5823430229187012}
Correct predictions are:  4161
Total predictions are:  5000
Accuracy on test set is: 0.8322 



EPOCH NUMBER:  3 (RUN:  2 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6768995983919751, 'tp': 2056, 'tn': 2135, 'fp': 358, 'fn': 451, 'auroc': 0.9193388076162518, 'auprc': 0.9156325050529198, 'eval_loss': 0.7657724404335022}
Correct predictions are:  4191
Total predictions are:  5000
Accuracy on test set is: 0.8382 



EPOCH NUMBER:  4 (RUN:  2 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6594624465411485, 'tp': 1998, 'tn': 2147, 'fp': 346, 'fn': 509, 'auroc': 0.9143858887853681, 'auprc': 0.9106326967392296, 'eval_loss': 0.7733904438018799}
Correct predictions are:  4145
Total predictions are:  5000
Accuracy on test set is: 0.829 



EPOCH NUMBER:  5 (RUN:  2 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6764324674998127, 'tp': 2117, 'tn': 2074, 'fp': 419, 'fn': 390, 'auroc': 0.9084089619262614, 'auprc': 0.8742415903950569, 'eval_loss': 0.7613740756988525}
Correct predictions are:  4191
Total predictions are:  5000
Accuracy on test set is: 0.8382 



EPOCH NUMBER:  6 (RUN:  2 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6600212963005788, 'tp': 2073, 'tn': 2077, 'fp': 416, 'fn': 434, 'auroc': 0.9135036418685523, 'auprc': 0.9100615489011707, 'eval_loss': 0.8551330177307129}
Correct predictions are:  4150
Total predictions are:  5000
Accuracy on test set is: 0.83 



EPOCH NUMBER:  7 (RUN:  2 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6396462514927388, 'tp': 1913, 'tn': 2176, 'fp': 317, 'fn': 594, 'auroc': 0.8912209071719123, 'auprc': 0.8375975882657642, 'eval_loss': 0.8873827724456788}
Correct predictions are:  4089
Total predictions are:  5000
Accuracy on test set is: 0.8178 



EPOCH NUMBER:  8 (RUN:  2 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6521194413273133, 'tp': 1961, 'tn': 2163, 'fp': 330, 'fn': 546, 'auroc': 0.8718334751744454, 'auprc': 0.7950018659671355, 'eval_loss': 0.9146985513687134}
Correct predictions are:  4124
Total predictions are:  5000
Accuracy on test set is: 0.8248 



EPOCH NUMBER:  9 (RUN:  2 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6427937208205398, 'tp': 2018, 'tn': 2088, 'fp': 405, 'fn': 489, 'auroc': 0.8933128435726936, 'auprc': 0.866307082315161, 'eval_loss': 0.8907213012695312}
Correct predictions are:  4106
Total predictions are:  5000
Accuracy on test set is: 0.8212 


[0.8334, 0.8356, 0.8322, 0.8382, 0.829, 0.8382, 0.83, 0.8178, 0.8248, 0.8212]

RUN NUMBER:  3


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  3 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.3370830942413513, 'tp': 964, 'tn': 2253, 'fp': 240, 'fn': 1543, 'auroc': 0.7574822586609078, 'auprc': 0.755125254293078, 'eval_loss': 0.668853515625}
Correct predictions are:  3217
Total predictions are:  5000
Accuracy on test set is: 0.6434 



EPOCH NUMBER:  1 (RUN:  3 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6101609642975184, 'tp': 2091, 'tn': 1932, 'fp': 561, 'fn': 416, 'auroc': 0.8887704879606256, 'auprc': 0.8814410786795248, 'eval_loss': 0.44493434448242186}
Correct predictions are:  4023
Total predictions are:  5000
Accuracy on test set is: 0.8046 



EPOCH NUMBER:  2 (RUN:  3 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6178946068930835, 'tp': 2191, 'tn': 1840, 'fp': 653, 'fn': 316, 'auroc': 0.8873857571043358, 'auprc': 0.8803335811914007, 'eval_loss': 0.4708058563232422}
Correct predictions are:  4031
Total predictions are:  5000
Accuracy on test set is: 0.8062 



EPOCH NUMBER:  3 (RUN:  3 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5994661214937059, 'tp': 1870, 'tn': 2120, 'fp': 373, 'fn': 637, 'auroc': 0.8853643812567491, 'auprc': 0.8774368188550293, 'eval_loss': 0.47309524536132813}
Correct predictions are:  3990
Total predictions are:  5000
Accuracy on test set is: 0.798 



EPOCH NUMBER:  4 (RUN:  3 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6035318715715192, 'tp': 2055, 'tn': 1953, 'fp': 540, 'fn': 452, 'auroc': 0.8851747797702735, 'auprc': 0.8727093490484439, 'eval_loss': 0.4800633911132812}
Correct predictions are:  4008
Total predictions are:  5000
Accuracy on test set is: 0.8016 



EPOCH NUMBER:  5 (RUN:  3 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6058574604671026, 'tp': 2053, 'tn': 1961, 'fp': 532, 'fn': 454, 'auroc': 0.8724194797687213, 'auprc': 0.8406215857001855, 'eval_loss': 0.5063310836791992}
Correct predictions are:  4014
Total predictions are:  5000
Accuracy on test set is: 0.8028 



EPOCH NUMBER:  6 (RUN:  3 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5821854798804017, 'tp': 1768, 'tn': 2167, 'fp': 326, 'fn': 739, 'auroc': 0.876433831241237, 'auprc': 0.8685487194566384, 'eval_loss': 0.5165218200683593}
Correct predictions are:  3935
Total predictions are:  5000
Accuracy on test set is: 0.787 



EPOCH NUMBER:  7 (RUN:  3 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6176205851309946, 'tp': 2077, 'tn': 1966, 'fp': 527, 'fn': 430, 'auroc': 0.8892873720129967, 'auprc': 0.869691863666118, 'eval_loss': 0.5414711395263672}
Correct predictions are:  4043
Total predictions are:  5000
Accuracy on test set is: 0.8086 



EPOCH NUMBER:  8 (RUN:  3 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6289035078408454, 'tp': 1963, 'tn': 2106, 'fp': 387, 'fn': 544, 'auroc': 0.8956814221423497, 'auprc': 0.8886660010448242, 'eval_loss': 0.579197119140625}
Correct predictions are:  4069
Total predictions are:  5000
Accuracy on test set is: 0.8138 



EPOCH NUMBER:  9 (RUN:  3 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6046048093773364, 'tp': 2196, 'tn': 1798, 'fp': 695, 'fn': 311, 'auroc': 0.8559892709558843, 'auprc': 0.7874565750681729, 'eval_loss': 0.5831983200073242}
Correct predictions are:  3994
Total predictions are:  5000
Accuracy on test set is: 0.7988 


[0.6434, 0.8046, 0.8062, 0.798, 0.8016, 0.8028, 0.787, 0.8086, 0.8138, 0.7988]

RUN NUMBER:  4


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  4 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6512896058418687, 'tp': 2121, 'tn': 2006, 'fp': 487, 'fn': 386, 'auroc': 0.9104678580680073, 'auprc': 0.9062294090241665, 'eval_loss': 0.39154700012207033}
Correct predictions are:  4127
Total predictions are:  5000
Accuracy on test set is: 0.8254 



EPOCH NUMBER:  1 (RUN:  4 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6602093311694947, 'tp': 2152, 'tn': 1996, 'fp': 497, 'fn': 355, 'auroc': 0.9136789232427581, 'auprc': 0.9090046300574341, 'eval_loss': 0.4068730941772461}
Correct predictions are:  4148
Total predictions are:  5000
Accuracy on test set is: 0.8296 



EPOCH NUMBER:  2 (RUN:  4 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6673576886059892, 'tp': 2041, 'tn': 2126, 'fp': 367, 'fn': 466, 'auroc': 0.9134201212137503, 'auprc': 0.9096178255229816, 'eval_loss': 0.5586449676513672}
Correct predictions are:  4167
Total predictions are:  5000
Accuracy on test set is: 0.8334 



EPOCH NUMBER:  3 (RUN:  4 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6469544900349956, 'tp': 2015, 'tn': 2101, 'fp': 392, 'fn': 492, 'auroc': 0.9081321597561324, 'auprc': 0.8945475226045769, 'eval_loss': 0.7072436698913575}
Correct predictions are:  4116
Total predictions are:  5000
Accuracy on test set is: 0.8232 



EPOCH NUMBER:  4 (RUN:  4 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6711979127247797, 'tp': 2100, 'tn': 2078, 'fp': 415, 'fn': 407, 'auroc': 0.9145864503577709, 'auprc': 0.9107825563944095, 'eval_loss': 0.7174553524017334}
Correct predictions are:  4178
Total predictions are:  5000
Accuracy on test set is: 0.8356 



EPOCH NUMBER:  5 (RUN:  4 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6507153452225435, 'tp': 2111, 'tn': 2015, 'fp': 478, 'fn': 396, 'auroc': 0.9071147117793403, 'auprc': 0.901346003789402, 'eval_loss': 0.8141768600463867}
Correct predictions are:  4126
Total predictions are:  5000
Accuracy on test set is: 0.8252 



EPOCH NUMBER:  6 (RUN:  4 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.661859361091761, 'tp': 2121, 'tn': 2033, 'fp': 460, 'fn': 386, 'auroc': 0.9116658674604009, 'auprc': 0.9063561646062945, 'eval_loss': 0.8837559328079224}
Correct predictions are:  4154
Total predictions are:  5000
Accuracy on test set is: 0.8308 



EPOCH NUMBER:  7 (RUN:  4 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6482333333134008, 'tp': 2013, 'tn': 2106, 'fp': 387, 'fn': 494, 'auroc': 0.9081648800126593, 'auprc': 0.904893842563497, 'eval_loss': 0.9308667888641358}
Correct predictions are:  4119
Total predictions are:  5000
Accuracy on test set is: 0.8238 



EPOCH NUMBER:  8 (RUN:  4 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6484456924249263, 'tp': 2054, 'tn': 2067, 'fp': 426, 'fn': 453, 'auroc': 0.9065072670169734, 'auprc': 0.9000933301130805, 'eval_loss': 0.9513826961517334}
Correct predictions are:  4121
Total predictions are:  5000
Accuracy on test set is: 0.8242 



EPOCH NUMBER:  9 (RUN:  4 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6335835371514079, 'tp': 2126, 'tn': 1955, 'fp': 538, 'fn': 381, 'auroc': 0.8657076671481105, 'auprc': 0.8084043559245919, 'eval_loss': 1.003686492729187}
Correct predictions are:  4081
Total predictions are:  5000
Accuracy on test set is: 0.8162 


[0.8254, 0.8296, 0.8334, 0.8232, 0.8356, 0.8252, 0.8308, 0.8238, 0.8242, 0.8162]

RUN NUMBER:  5


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  5 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6027092453562417, 'tp': 1972, 'tn': 2034, 'fp': 459, 'fn': 535, 'auroc': 0.8830253229185316, 'auprc': 0.8781057094919364, 'eval_loss': 0.43562613525390625}
Correct predictions are:  4006
Total predictions are:  5000
Accuracy on test set is: 0.8012 



EPOCH NUMBER:  1 (RUN:  5 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6295592787568175, 'tp': 2088, 'tn': 1985, 'fp': 508, 'fn': 419, 'auroc': 0.895324379343134, 'auprc': 0.8934654915289658, 'eval_loss': 0.44515700073242187}
Correct predictions are:  4073
Total predictions are:  5000
Accuracy on test set is: 0.8146 



EPOCH NUMBER:  2 (RUN:  5 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6391963730812775, 'tp': 2058, 'tn': 2040, 'fp': 453, 'fn': 449, 'auroc': 0.9006477010779763, 'auprc': 0.8958707251151847, 'eval_loss': 0.468297492980957}
Correct predictions are:  4098
Total predictions are:  5000
Accuracy on test set is: 0.8196 



EPOCH NUMBER:  3 (RUN:  5 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.630562867488368, 'tp': 1941, 'tn': 2130, 'fp': 363, 'fn': 566, 'auroc': 0.8793256939134404, 'auprc': 0.8858612746895448, 'eval_loss': 0.6122518157958985}
Correct predictions are:  4071
Total predictions are:  5000
Accuracy on test set is: 0.8142 



EPOCH NUMBER:  4 (RUN:  5 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6370832392235419, 'tp': 1981, 'tn': 2109, 'fp': 384, 'fn': 526, 'auroc': 0.9036464445881255, 'auprc': 0.9010412733435065, 'eval_loss': 0.6354167709350586}
Correct predictions are:  4090
Total predictions are:  5000
Accuracy on test set is: 0.818 



EPOCH NUMBER:  5 (RUN:  5 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6191195279108281, 'tp': 1873, 'tn': 2163, 'fp': 330, 'fn': 634, 'auroc': 0.8828801217801548, 'auprc': 0.8767226562189465, 'eval_loss': 0.708967544555664}
Correct predictions are:  4036
Total predictions are:  5000
Accuracy on test set is: 0.8072 



EPOCH NUMBER:  6 (RUN:  5 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.541237696264561, 'tp': 1504, 'tn': 2279, 'fp': 214, 'fn': 1003, 'auroc': 0.8220307647211954, 'auprc': 0.7773668203119581, 'eval_loss': 0.639410043334961}
Correct predictions are:  3783
Total predictions are:  5000
Accuracy on test set is: 0.7566 



EPOCH NUMBER:  7 (RUN:  5 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.598024030361772, 'tp': 2019, 'tn': 1976, 'fp': 517, 'fn': 488, 'auroc': 0.8634902097632445, 'auprc': 0.8300268385729286, 'eval_loss': 0.6404690490722657}
Correct predictions are:  3995
Total predictions are:  5000
Accuracy on test set is: 0.799 



EPOCH NUMBER:  8 (RUN:  5 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6089967991069779, 'tp': 1934, 'tn': 2085, 'fp': 408, 'fn': 573, 'auroc': 0.8700794614229774, 'auprc': 0.8461399331690005, 'eval_loss': 0.7172219055175781}
Correct predictions are:  4019
Total predictions are:  5000
Accuracy on test set is: 0.8038 



EPOCH NUMBER:  9 (RUN:  5 COMB:  LOW_RSW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6055845546201494, 'tp': 1971, 'tn': 2042, 'fp': 451, 'fn': 536, 'auroc': 0.8845646149865816, 'auprc': 0.8743184373796822, 'eval_loss': 0.6843734313964843}
Correct predictions are:  4013
Total predictions are:  5000
Accuracy on test set is: 0.8026 


[0.8012, 0.8146, 0.8196, 0.8142, 0.818, 0.8072, 0.7566, 0.799, 0.8038, 0.8026]


 Over all runs maximum accuracies are: [0.8138, 0.8182, 0.8196, 0.8356, 0.8382]
The median is: 0.8196
XLNet Accuracy Score on Test set ->  ['0.8196 +/- 0.01859999999999995']


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a



* * * * EVALUATION USING STM_LOW_RSW AS PREPROCESSING FUNCTION * * * *

RUN NUMBER:  1


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  1 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6533010607843854, 'tp': 2053, 'tn': 2080, 'fp': 413, 'fn': 454, 'auroc': 0.9064452665308896, 'auprc': 0.9035684632056435, 'eval_loss': 0.4064079460144043}
Correct predictions are:  4133
Total predictions are:  5000
Accuracy on test set is: 0.8266 



EPOCH NUMBER:  1 (RUN:  1 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6566492632871263, 'tp': 2114, 'tn': 2027, 'fp': 466, 'fn': 393, 'auroc': 0.9102528163820806, 'auprc': 0.9055859953976444, 'eval_loss': 0.4239761734008789}
Correct predictions are:  4141
Total predictions are:  5000
Accuracy on test set is: 0.8282 



EPOCH NUMBER:  2 (RUN:  1 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6518575165568367, 'tp': 2038, 'tn': 2091, 'fp': 402, 'fn': 469, 'auroc': 0.9081295997360619, 'auprc': 0.9045451723038663, 'eval_loss': 0.5798889572143555}
Correct predictions are:  4129
Total predictions are:  5000
Accuracy on test set is: 0.8258 



EPOCH NUMBER:  3 (RUN:  1 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6428081636822014, 'tp': 2070, 'tn': 2037, 'fp': 456, 'fn': 437, 'auroc': 0.9020209918445761, 'auprc': 0.9004633759190219, 'eval_loss': 0.7377497495651245}
Correct predictions are:  4107
Total predictions are:  5000
Accuracy on test set is: 0.8214 



EPOCH NUMBER:  4 (RUN:  1 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6427778918037768, 'tp': 2105, 'tn': 2001, 'fp': 492, 'fn': 402, 'auroc': 0.9059046222922388, 'auprc': 0.901181953788685, 'eval_loss': 0.7898477802276611}
Correct predictions are:  4106
Total predictions are:  5000
Accuracy on test set is: 0.8212 



EPOCH NUMBER:  5 (RUN:  1 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6452582849089016, 'tp': 2129, 'tn': 1982, 'fp': 511, 'fn': 378, 'auroc': 0.8728142828639774, 'auprc': 0.8782744697880223, 'eval_loss': 0.8313896766662597}
Correct predictions are:  4111
Total predictions are:  5000
Accuracy on test set is: 0.8222 



EPOCH NUMBER:  6 (RUN:  1 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6500391215168089, 'tp': 2117, 'tn': 2007, 'fp': 486, 'fn': 390, 'auroc': 0.8984932841873481, 'auprc': 0.8917129081464596, 'eval_loss': 0.7704165252685546}
Correct predictions are:  4124
Total predictions are:  5000
Accuracy on test set is: 0.8248 



EPOCH NUMBER:  7 (RUN:  1 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6356333640077368, 'tp': 2067, 'tn': 2022, 'fp': 471, 'fn': 440, 'auroc': 0.8874074372743083, 'auprc': 0.8841105906463005, 'eval_loss': 0.8484949523925781}
Correct predictions are:  4089
Total predictions are:  5000
Accuracy on test set is: 0.8178 



EPOCH NUMBER:  8 (RUN:  1 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6378254765993441, 'tp': 1984, 'tn': 2108, 'fp': 385, 'fn': 523, 'auroc': 0.8993489708959319, 'auprc': 0.8869316413689552, 'eval_loss': 0.9059666618347167}
Correct predictions are:  4092
Total predictions are:  5000
Accuracy on test set is: 0.8184 



EPOCH NUMBER:  9 (RUN:  1 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6093228452302422, 'tp': 1904, 'tn': 2113, 'fp': 380, 'fn': 603, 'auroc': 0.8936043658582284, 'auprc': 0.8861795767964711, 'eval_loss': 0.9499105377197266}
Correct predictions are:  4017
Total predictions are:  5000
Accuracy on test set is: 0.8034 


[0.8266, 0.8282, 0.8258, 0.8214, 0.8212, 0.8222, 0.8248, 0.8178, 0.8184, 0.8034]

RUN NUMBER:  2


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  2 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.6205058247656662, 'auprc': 0.5872900717420153, 'eval_loss': 0.69600390625}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  1 (RUN:  2 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.6632811201239818, 'auprc': 0.6234267205863381, 'eval_loss': 0.69314375}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  2 (RUN:  2 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.5012195295611117, 'auprc': 0.5029094188574852, 'eval_loss': 0.69396533203125}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  3 (RUN:  2 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.4964526921891068, 'auprc': 0.4996554357201345, 'eval_loss': 0.694120703125}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  4 (RUN:  2 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.5029180228772993, 'auprc': 0.5040118404022627, 'eval_loss': 0.69318154296875}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  5 (RUN:  2 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.49190721655257774, 'auprc': 0.4970325129989077, 'eval_loss': 0.6931572265625}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  6 (RUN:  2 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.603247929463767, 'auprc': 0.5806439766191, 'eval_loss': 0.6964748046875}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  7 (RUN:  2 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.18130530078166499, 'tp': 615, 'tn': 2226, 'fp': 267, 'fn': 1892, 'auroc': 0.6461786660407418, 'auprc': 0.619870144925775, 'eval_loss': 0.6850809814453125}
Correct predictions are:  2841
Total predictions are:  5000
Accuracy on test set is: 0.5682 



EPOCH NUMBER:  8 (RUN:  2 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.5054662828556576, 'auprc': 0.54742264407125, 'eval_loss': 0.69316279296875}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  9 (RUN:  2 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.2974211801480624, 'tp': 1366, 'tn': 1861, 'fp': 632, 'fn': 1141, 'auroc': 0.6496482132419918, 'auprc': 0.6321470445505012, 'eval_loss': 0.659189111328125}
Correct predictions are:  3227
Total predictions are:  5000
Accuracy on test set is: 0.6454 


[0.4986, 0.5014, 0.5014, 0.5014, 0.4986, 0.4986, 0.5014, 0.5682, 0.4986, 0.6454]

RUN NUMBER:  3


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  3 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.5340449069120702, 'auprc': 0.5215830415877304, 'eval_loss': 0.69329560546875}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  1 (RUN:  3 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.5195671134061692, 'auprc': 0.5115581974265389, 'eval_loss': 0.69473408203125}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  2 (RUN:  3 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.4979208636995714, 'auprc': 0.500556117105237, 'eval_loss': 0.6958765625}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  3 (RUN:  3 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.5042782735416645, 'auprc': 0.5037557898628139, 'eval_loss': 0.69352783203125}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  4 (RUN:  3 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.4903954447002864, 'auprc': 0.49724490696854623, 'eval_loss': 0.693388671875}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  5 (RUN:  3 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.4780855881910114, 'auprc': 0.4908296331261187, 'eval_loss': 0.693324609375}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  6 (RUN:  3 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.48245506244768965, 'auprc': 0.4925541301018783, 'eval_loss': 0.69352958984375}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  7 (RUN:  3 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.509803836862081, 'auprc': 0.5070502206982624, 'eval_loss': 0.69355888671875}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  8 (RUN:  3 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.49842078761897496, 'auprc': 0.5006211005644372, 'eval_loss': 0.6953951171875}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  9 (RUN:  3 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.49487683983442426, 'auprc': 0.4989425549604142, 'eval_loss': 0.693315625}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 


[0.4986, 0.4986, 0.5014, 0.5014, 0.5014, 0.4986, 0.4986, 0.5014, 0.5014, 0.5014]

RUN NUMBER:  4


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  4 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.5341072274006627, 'auprc': 0.5230871269710794, 'eval_loss': 0.6934265625}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  1 (RUN:  4 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.4891725551128321, 'auprc': 0.49590332869031306, 'eval_loss': 0.69313359375}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  2 (RUN:  4 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.49967559745668405, 'auprc': 0.5012577371748558, 'eval_loss': 0.69396552734375}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  3 (RUN:  4 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.5166238103306731, 'auprc': 0.5134155335132432, 'eval_loss': 0.6947287109375}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  4 (RUN:  4 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.5062034086347236, 'auprc': 0.5048001565935961, 'eval_loss': 0.69442939453125}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  5 (RUN:  4 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.5022375375422944, 'auprc': 0.5017411117947693, 'eval_loss': 0.69394384765625}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  6 (RUN:  4 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.4930768257223137, 'auprc': 0.4981800008875105, 'eval_loss': 0.69393642578125}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  7 (RUN:  4 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.5025146597149321, 'auprc': 0.5055367043770151, 'eval_loss': 0.69309814453125}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  8 (RUN:  4 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.4970416568065893, 'auprc': 0.5009828935617343, 'eval_loss': 0.693443359375}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  9 (RUN:  4 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.48483524110829035, 'auprc': 0.49126963113604827, 'eval_loss': 0.69309404296875}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 


[0.4986, 0.4986, 0.5014, 0.4986, 0.5014, 0.5014, 0.4986, 0.5014, 0.4986, 0.5014]

RUN NUMBER:  5


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  5 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6149252512821692, 'tp': 1932, 'tn': 2101, 'fp': 392, 'fn': 575, 'auroc': 0.892592757927222, 'auprc': 0.8892229989365881, 'eval_loss': 0.4213723266601562}
Correct predictions are:  4033
Total predictions are:  5000
Accuracy on test set is: 0.8066 



EPOCH NUMBER:  1 (RUN:  5 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6412110802356924, 'tp': 2052, 'tn': 2051, 'fp': 442, 'fn': 455, 'auroc': 0.9042546093561374, 'auprc': 0.9010376908639985, 'eval_loss': 0.44808924102783204}
Correct predictions are:  4103
Total predictions are:  5000
Accuracy on test set is: 0.8206 



EPOCH NUMBER:  2 (RUN:  5 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6433874410195864, 'tp': 2116, 'tn': 1991, 'fp': 502, 'fn': 391, 'auroc': 0.8977127180677097, 'auprc': 0.8914386176977347, 'eval_loss': 0.5329214233398437}
Correct predictions are:  4107
Total predictions are:  5000
Accuracy on test set is: 0.8214 



EPOCH NUMBER:  3 (RUN:  5 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6391830464165539, 'tp': 2014, 'tn': 2083, 'fp': 410, 'fn': 493, 'auroc': 0.9016884292372852, 'auprc': 0.8953803358842591, 'eval_loss': 0.6516408889770507}
Correct predictions are:  4097
Total predictions are:  5000
Accuracy on test set is: 0.8194 



EPOCH NUMBER:  4 (RUN:  5 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6298295632946834, 'tp': 1920, 'tn': 2147, 'fp': 346, 'fn': 587, 'auroc': 0.8952221385415662, 'auprc': 0.8961013033412051, 'eval_loss': 0.720798779296875}
Correct predictions are:  4067
Total predictions are:  5000
Accuracy on test set is: 0.8134 



EPOCH NUMBER:  5 (RUN:  5 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6370753728444968, 'tp': 2018, 'tn': 2074, 'fp': 419, 'fn': 489, 'auroc': 0.8995824127261158, 'auprc': 0.8914506572425474, 'eval_loss': 0.6944550933837891}
Correct predictions are:  4092
Total predictions are:  5000
Accuracy on test set is: 0.8184 



EPOCH NUMBER:  6 (RUN:  5 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6292423089097944, 'tp': 1998, 'tn': 2074, 'fp': 419, 'fn': 509, 'auroc': 0.8739896520788724, 'auprc': 0.8669529192865003, 'eval_loss': 0.7533806930541992}
Correct predictions are:  4072
Total predictions are:  5000
Accuracy on test set is: 0.8144 



EPOCH NUMBER:  7 (RUN:  5 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6124062246715899, 'tp': 1921, 'tn': 2105, 'fp': 388, 'fn': 586, 'auroc': 0.8944230922770435, 'auprc': 0.8866681702213086, 'eval_loss': 0.7125161453247071}
Correct predictions are:  4026
Total predictions are:  5000
Accuracy on test set is: 0.8052 



EPOCH NUMBER:  8 (RUN:  5 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6204727093740477, 'tp': 2015, 'tn': 2036, 'fp': 457, 'fn': 492, 'auroc': 0.8856567035485559, 'auprc': 0.8581034846895348, 'eval_loss': 0.7816527023315429}
Correct predictions are:  4051
Total predictions are:  5000
Accuracy on test set is: 0.8102 



EPOCH NUMBER:  9 (RUN:  5 COMB:  STM_LOW_RSW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6308431977069209, 'tp': 2000, 'tn': 2076, 'fp': 417, 'fn': 507, 'auroc': 0.8559565506993574, 'auprc': 0.8213663965068274, 'eval_loss': 0.7946084274291992}
Correct predictions are:  4076
Total predictions are:  5000
Accuracy on test set is: 0.8152 


[0.8066, 0.8206, 0.8214, 0.8194, 0.8134, 0.8184, 0.8144, 0.8052, 0.8102, 0.8152]


 Over all runs maximum accuracies are: [0.5014, 0.5014, 0.6454, 0.8214, 0.8282]
The median is: 0.6454
XLNet Accuracy Score on Test set ->  ['0.6454 +/- 0.18280000000000007']


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a



* * * * EVALUATION USING STM_RSW_LOW AS PREPROCESSING FUNCTION * * * *

RUN NUMBER:  1


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  1 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.5201111976717898, 'auprc': 0.5131066649901016, 'eval_loss': 0.69318291015625}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  1 (RUN:  1 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.5522204094080098, 'auprc': 0.5326879417957621, 'eval_loss': 0.693669140625}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  2 (RUN:  1 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.7609822861011231, 'auprc': 0.7133318303876932, 'eval_loss': 0.8330762939453125}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  3 (RUN:  1 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.24198803859186221, 'tp': 2474, 'tn': 356, 'fp': 2137, 'fn': 33, 'auroc': 0.7723202949911125, 'auprc': 0.7228199855889934, 'eval_loss': 0.6899525390625}
Correct predictions are:  2830
Total predictions are:  5000
Accuracy on test set is: 0.566 



EPOCH NUMBER:  4 (RUN:  1 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.48270542717091897, 'tp': 2018, 'tn': 1679, 'fp': 814, 'fn': 489, 'auroc': 0.7909415609818381, 'auprc': 0.7347728593507318, 'eval_loss': 0.5516085571289062}
Correct predictions are:  3697
Total predictions are:  5000
Accuracy on test set is: 0.7394 



EPOCH NUMBER:  5 (RUN:  1 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.6360865869188415, 'auprc': 0.5974237817980578, 'eval_loss': 0.765079296875}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  6 (RUN:  1 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.014183207587881927, 'tp': 2507, 'tn': 1, 'fp': 2492, 'fn': 0, 'auroc': 0.776049124225134, 'auprc': 0.7411605533226434, 'eval_loss': 0.7396711303710938}
Correct predictions are:  2508
Total predictions are:  5000
Accuracy on test set is: 0.5016 



EPOCH NUMBER:  7 (RUN:  1 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.7558314457185344, 'auprc': 0.6827439649628912, 'eval_loss': 0.801831591796875}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  8 (RUN:  1 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5100269391082775, 'tp': 1912, 'tn': 1863, 'fp': 630, 'fn': 595, 'auroc': 0.7969033677224031, 'auprc': 0.7440777393613456, 'eval_loss': 0.5696580932617188}
Correct predictions are:  3775
Total predictions are:  5000
Accuracy on test set is: 0.755 



EPOCH NUMBER:  9 (RUN:  1 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5357914604255162, 'tp': 1988, 'tn': 1850, 'fp': 643, 'fn': 519, 'auroc': 0.7980973770834363, 'auprc': 0.7444293781883446, 'eval_loss': 0.558259521484375}
Correct predictions are:  3838
Total predictions are:  5000
Accuracy on test set is: 0.7676 


[0.4986, 0.4986, 0.5014, 0.566, 0.7394, 0.5014, 0.5016, 0.5014, 0.755, 0.7676]

RUN NUMBER:  2


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  2 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.7619128533967706, 'auprc': 0.7561605944784102, 'eval_loss': 0.6974474609375}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  1 (RUN:  2 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.5803613500329843, 'auprc': 0.5604854747254013, 'eval_loss': 0.6932375}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  2 (RUN:  2 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.37791734397038285, 'tp': 2287, 'tn': 1036, 'fp': 1457, 'fn': 220, 'auroc': 0.75693065433633, 'auprc': 0.7142981244907075, 'eval_loss': 0.6269637878417968}
Correct predictions are:  3323
Total predictions are:  5000
Accuracy on test set is: 0.6646 



EPOCH NUMBER:  3 (RUN:  2 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.518686999946946, 'tp': 2041, 'tn': 1748, 'fp': 745, 'fn': 466, 'auroc': 0.8427766073686018, 'auprc': 0.8414217598383018, 'eval_loss': 0.5150193725585938}
Correct predictions are:  3789
Total predictions are:  5000
Accuracy on test set is: 0.7578 



EPOCH NUMBER:  4 (RUN:  2 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.4956644567438545, 'tp': 1934, 'tn': 1804, 'fp': 689, 'fn': 573, 'auroc': 0.8374995259962837, 'auprc': 0.8244215119796647, 'eval_loss': 0.513419189453125}
Correct predictions are:  3738
Total predictions are:  5000
Accuracy on test set is: 0.7476 



EPOCH NUMBER:  5 (RUN:  2 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5449510881021009, 'tp': 1911, 'tn': 1951, 'fp': 542, 'fn': 596, 'auroc': 0.7861668035477396, 'auprc': 0.7589076060148527, 'eval_loss': 0.5604802001953125}
Correct predictions are:  3862
Total predictions are:  5000
Accuracy on test set is: 0.7724 



EPOCH NUMBER:  6 (RUN:  2 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5642307748527468, 'tp': 1841, 'tn': 2063, 'fp': 430, 'fn': 666, 'auroc': 0.8623298006656374, 'auprc': 0.8540519569001069, 'eval_loss': 0.5068243896484375}
Correct predictions are:  3904
Total predictions are:  5000
Accuracy on test set is: 0.7808 



EPOCH NUMBER:  7 (RUN:  2 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5688163881000615, 'tp': 2165, 'tn': 1738, 'fp': 755, 'fn': 342, 'auroc': 0.8554963070910475, 'auprc': 0.8136721357114026, 'eval_loss': 0.5364376586914062}
Correct predictions are:  3903
Total predictions are:  5000
Accuracy on test set is: 0.7806 



EPOCH NUMBER:  8 (RUN:  2 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5828587723295735, 'tp': 2055, 'tn': 1900, 'fp': 593, 'fn': 452, 'auroc': 0.8256792733255027, 'auprc': 0.8139464371876098, 'eval_loss': 0.54876923828125}
Correct predictions are:  3955
Total predictions are:  5000
Accuracy on test set is: 0.791 



EPOCH NUMBER:  9 (RUN:  2 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5480484774524108, 'tp': 1655, 'tn': 2182, 'fp': 311, 'fn': 852, 'auroc': 0.8632582079443502, 'auprc': 0.8335651568439688, 'eval_loss': 0.6156812438964844}
Correct predictions are:  3837
Total predictions are:  5000
Accuracy on test set is: 0.7674 


[0.4986, 0.5014, 0.6646, 0.7578, 0.7476, 0.7724, 0.7808, 0.7806, 0.791, 0.7674]

RUN NUMBER:  3


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  3 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6204563582647736, 'tp': 1986, 'tn': 2064, 'fp': 429, 'fn': 521, 'auroc': 0.8958494234594798, 'auprc': 0.8914909527714394, 'eval_loss': 0.413508349609375}
Correct predictions are:  4050
Total predictions are:  5000
Accuracy on test set is: 0.81 



EPOCH NUMBER:  1 (RUN:  3 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6653270491264485, 'tp': 2115, 'tn': 2048, 'fp': 445, 'fn': 392, 'auroc': 0.9087487245900008, 'auprc': 0.9044420437685405, 'eval_loss': 0.4209587020874023}
Correct predictions are:  4163
Total predictions are:  5000
Accuracy on test set is: 0.8326 



EPOCH NUMBER:  2 (RUN:  3 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6465756074484937, 'tp': 2147, 'tn': 1966, 'fp': 527, 'fn': 360, 'auroc': 0.9074006340209708, 'auprc': 0.904232670874669, 'eval_loss': 0.4929868942260742}
Correct predictions are:  4113
Total predictions are:  5000
Accuracy on test set is: 0.8226 



EPOCH NUMBER:  3 (RUN:  3 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6355967857954012, 'tp': 2052, 'tn': 2037, 'fp': 456, 'fn': 455, 'auroc': 0.8966873500288243, 'auprc': 0.8641501712860551, 'eval_loss': 0.663383878326416}
Correct predictions are:  4089
Total predictions are:  5000
Accuracy on test set is: 0.8178 



EPOCH NUMBER:  4 (RUN:  3 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.644445548738188, 'tp': 2049, 'tn': 2062, 'fp': 431, 'fn': 458, 'auroc': 0.904949814806548, 'auprc': 0.8990703597791246, 'eval_loss': 0.7885757019042968}
Correct predictions are:  4111
Total predictions are:  5000
Accuracy on test set is: 0.8222 



EPOCH NUMBER:  5 (RUN:  3 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6475449367800306, 'tp': 1980, 'tn': 2135, 'fp': 358, 'fn': 527, 'auroc': 0.9033093219450841, 'auprc': 0.8888983389703817, 'eval_loss': 0.7879765697479248}
Correct predictions are:  4115
Total predictions are:  5000
Accuracy on test set is: 0.823 



EPOCH NUMBER:  6 (RUN:  3 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.640392572229452, 'tp': 2015, 'tn': 2085, 'fp': 408, 'fn': 492, 'auroc': 0.9017528297421853, 'auprc': 0.8953561269784234, 'eval_loss': 0.8287986366271972}
Correct predictions are:  4100
Total predictions are:  5000
Accuracy on test set is: 0.82 



EPOCH NUMBER:  7 (RUN:  3 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6404998266952151, 'tp': 2037, 'tn': 2064, 'fp': 429, 'fn': 470, 'auroc': 0.9057379809857711, 'auprc': 0.898105521771519, 'eval_loss': 0.8610835746765136}
Correct predictions are:  4101
Total predictions are:  5000
Accuracy on test set is: 0.8202 



EPOCH NUMBER:  8 (RUN:  3 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.631284992392585, 'tp': 2069, 'tn': 2009, 'fp': 484, 'fn': 438, 'auroc': 0.8830382030195116, 'auprc': 0.8414150429688052, 'eval_loss': 0.9491865413665771}
Correct predictions are:  4078
Total predictions are:  5000
Accuracy on test set is: 0.8156 



EPOCH NUMBER:  9 (RUN:  3 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6332308696681035, 'tp': 1971, 'tn': 2109, 'fp': 384, 'fn': 536, 'auroc': 0.9000658565163149, 'auprc': 0.8908126424481329, 'eval_loss': 1.019714031600952}
Correct predictions are:  4080
Total predictions are:  5000
Accuracy on test set is: 0.816 


[0.81, 0.8326, 0.8226, 0.8178, 0.8222, 0.823, 0.82, 0.8202, 0.8156, 0.816]

RUN NUMBER:  4


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  4 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5902008117364822, 'tp': 1923, 'tn': 2050, 'fp': 443, 'fn': 584, 'auroc': 0.8752412618914932, 'auprc': 0.8688937644301641, 'eval_loss': 0.4649808471679687}
Correct predictions are:  3973
Total predictions are:  5000
Accuracy on test set is: 0.7946 



EPOCH NUMBER:  1 (RUN:  4 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6409930238826089, 'tp': 2029, 'tn': 2073, 'fp': 420, 'fn': 478, 'auroc': 0.9034083627215637, 'auprc': 0.8982682005827158, 'eval_loss': 0.4412956329345703}
Correct predictions are:  4102
Total predictions are:  5000
Accuracy on test set is: 0.8204 



EPOCH NUMBER:  2 (RUN:  4 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6307040506000122, 'tp': 2008, 'tn': 2068, 'fp': 425, 'fn': 499, 'auroc': 0.902605796429444, 'auprc': 0.8961984403653338, 'eval_loss': 0.4943930236816406}
Correct predictions are:  4076
Total predictions are:  5000
Accuracy on test set is: 0.8152 



EPOCH NUMBER:  3 (RUN:  4 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6408953093273787, 'tp': 2038, 'tn': 2064, 'fp': 429, 'fn': 469, 'auroc': 0.9004055391794271, 'auprc': 0.8882769698751513, 'eval_loss': 0.5585321258544922}
Correct predictions are:  4102
Total predictions are:  5000
Accuracy on test set is: 0.8204 



EPOCH NUMBER:  4 (RUN:  4 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6211117476429, 'tp': 1883, 'tn': 2159, 'fp': 334, 'fn': 624, 'auroc': 0.8897700157969238, 'auprc': 0.8843974470090753, 'eval_loss': 0.7468706710815429}
Correct predictions are:  4042
Total predictions are:  5000
Accuracy on test set is: 0.8084 



EPOCH NUMBER:  5 (RUN:  4 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6428511612138235, 'tp': 1997, 'tn': 2108, 'fp': 385, 'fn': 510, 'auroc': 0.902264593754415, 'auprc': 0.8922753315668647, 'eval_loss': 0.768890193939209}
Correct predictions are:  4105
Total predictions are:  5000
Accuracy on test set is: 0.821 



EPOCH NUMBER:  6 (RUN:  4 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.618563231362287, 'tp': 1875, 'tn': 2160, 'fp': 333, 'fn': 632, 'auroc': 0.8991781695568493, 'auprc': 0.8912203342934266, 'eval_loss': 0.8044171142578125}
Correct predictions are:  4035
Total predictions are:  5000
Accuracy on test set is: 0.807 



EPOCH NUMBER:  7 (RUN:  4 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6378249625151766, 'tp': 2000, 'tn': 2093, 'fp': 400, 'fn': 507, 'auroc': 0.9026781169964373, 'auprc': 0.8966777550728461, 'eval_loss': 0.8159680389404297}
Correct predictions are:  4093
Total predictions are:  5000
Accuracy on test set is: 0.8186 



EPOCH NUMBER:  8 (RUN:  4 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.612126231965447, 'tp': 2050, 'tn': 1980, 'fp': 513, 'fn': 457, 'auroc': 0.8507937102226881, 'auprc': 0.8019122016607786, 'eval_loss': 0.9173818092346191}
Correct predictions are:  4030
Total predictions are:  5000
Accuracy on test set is: 0.806 



EPOCH NUMBER:  9 (RUN:  4 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5962586113747722, 'tp': 1829, 'tn': 2148, 'fp': 345, 'fn': 678, 'auroc': 0.8749547796454725, 'auprc': 0.8643488824783903, 'eval_loss': 0.8686580078125}
Correct predictions are:  3977
Total predictions are:  5000
Accuracy on test set is: 0.7954 


[0.7946, 0.8204, 0.8152, 0.8204, 0.8084, 0.821, 0.807, 0.8186, 0.806, 0.7954]

RUN NUMBER:  5


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  5 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5663421229028934, 'tp': 1773, 'tn': 2127, 'fp': 366, 'fn': 734, 'auroc': 0.8689504125712346, 'auprc': 0.869607294518757, 'eval_loss': 0.5167520050048828}
Correct predictions are:  3900
Total predictions are:  5000
Accuracy on test set is: 0.78 



EPOCH NUMBER:  1 (RUN:  5 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6009936362682122, 'tp': 1901, 'tn': 2096, 'fp': 397, 'fn': 606, 'auroc': 0.884255972566825, 'auprc': 0.8817228837653847, 'eval_loss': 0.46703442687988284}
Correct predictions are:  3997
Total predictions are:  5000
Accuracy on test set is: 0.7994 



EPOCH NUMBER:  2 (RUN:  5 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6259511250777423, 'tp': 1999, 'tn': 2065, 'fp': 428, 'fn': 508, 'auroc': 0.8968197510668483, 'auprc': 0.892370684631796, 'eval_loss': 0.44706002349853513}
Correct predictions are:  4064
Total predictions are:  5000
Accuracy on test set is: 0.8128 



EPOCH NUMBER:  3 (RUN:  5 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.639666661262983, 'tp': 2077, 'tn': 2022, 'fp': 471, 'fn': 430, 'auroc': 0.8987133659127887, 'auprc': 0.8941161859930629, 'eval_loss': 0.5215652374267579}
Correct predictions are:  4099
Total predictions are:  5000
Accuracy on test set is: 0.8198 



EPOCH NUMBER:  4 (RUN:  5 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6300899288363895, 'tp': 2025, 'tn': 2050, 'fp': 443, 'fn': 482, 'auroc': 0.894407332153484, 'auprc': 0.8899017229602794, 'eval_loss': 0.6083617309570313}
Correct predictions are:  4075
Total predictions are:  5000
Accuracy on test set is: 0.815 



EPOCH NUMBER:  5 (RUN:  5 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.618914498923599, 'tp': 1981, 'tn': 2065, 'fp': 428, 'fn': 526, 'auroc': 0.8950903775085598, 'auprc': 0.8921330794033568, 'eval_loss': 0.7132672576904296}
Correct predictions are:  4046
Total predictions are:  5000
Accuracy on test set is: 0.8092 



EPOCH NUMBER:  6 (RUN:  5 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6069691538927379, 'tp': 1946, 'tn': 2069, 'fp': 424, 'fn': 561, 'auroc': 0.8931314021501928, 'auprc': 0.8849301593233257, 'eval_loss': 0.7929477821350097}
Correct predictions are:  4015
Total predictions are:  5000
Accuracy on test set is: 0.803 



EPOCH NUMBER:  7 (RUN:  5 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6332187479727464, 'tp': 2040, 'tn': 2043, 'fp': 450, 'fn': 467, 'auroc': 0.8887959281600768, 'auprc': 0.8650152054698248, 'eval_loss': 0.7633065200805664}
Correct predictions are:  4083
Total predictions are:  5000
Accuracy on test set is: 0.8166 



EPOCH NUMBER:  8 (RUN:  5 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6268388650234318, 'tp': 2028, 'tn': 2039, 'fp': 454, 'fn': 479, 'auroc': 0.8953513395545021, 'auprc': 0.8906766896073375, 'eval_loss': 0.7686810348510742}
Correct predictions are:  4067
Total predictions are:  5000
Accuracy on test set is: 0.8134 



EPOCH NUMBER:  9 (RUN:  5 COMB:  STM_RSW_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6010699496761059, 'tp': 2069, 'tn': 1932, 'fp': 561, 'fn': 438, 'auroc': 0.8724317198646837, 'auprc': 0.8642796680471236, 'eval_loss': 0.8069192581176757}
Correct predictions are:  4001
Total predictions are:  5000
Accuracy on test set is: 0.8002 


[0.78, 0.7994, 0.8128, 0.8198, 0.815, 0.8092, 0.803, 0.8166, 0.8134, 0.8002]


 Over all runs maximum accuracies are: [0.7676, 0.791, 0.8198, 0.821, 0.8326]
The median is: 0.8198
XLNet Accuracy Score on Test set ->  ['0.8198 +/- 0.052200000000000024']


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a



* * * * EVALUATION USING RSW_LOW_STM AS PREPROCESSING FUNCTION * * * *

RUN NUMBER:  1


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  1 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6734148328518214, 'tp': 2132, 'tn': 2051, 'fp': 442, 'fn': 375, 'auroc': 0.9124402735317445, 'auprc': 0.9065166455647922, 'eval_loss': 0.38231314086914064}
Correct predictions are:  4183
Total predictions are:  5000
Accuracy on test set is: 0.8366 



EPOCH NUMBER:  1 (RUN:  1 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6684249134673137, 'tp': 2049, 'tn': 2121, 'fp': 372, 'fn': 458, 'auroc': 0.9161573426735666, 'auprc': 0.9116649760332629, 'eval_loss': 0.4173922966003418}
Correct predictions are:  4170
Total predictions are:  5000
Accuracy on test set is: 0.834 



EPOCH NUMBER:  2 (RUN:  1 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6731536818812367, 'tp': 2059, 'tn': 2123, 'fp': 370, 'fn': 448, 'auroc': 0.9160425417735275, 'auprc': 0.912232145196739, 'eval_loss': 0.5096317649841309}
Correct predictions are:  4182
Total predictions are:  5000
Accuracy on test set is: 0.8364 



EPOCH NUMBER:  3 (RUN:  1 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6716761216579646, 'tp': 2079, 'tn': 2100, 'fp': 393, 'fn': 428, 'auroc': 0.895505420762499, 'auprc': 0.8945433401579765, 'eval_loss': 0.6383901149749756}
Correct predictions are:  4179
Total predictions are:  5000
Accuracy on test set is: 0.8358 



EPOCH NUMBER:  4 (RUN:  1 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6680080197016735, 'tp': 2101, 'tn': 2069, 'fp': 424, 'fn': 406, 'auroc': 0.91258179464127, 'auprc': 0.9059912722641963, 'eval_loss': 0.7858781304359436}
Correct predictions are:  4170
Total predictions are:  5000
Accuracy on test set is: 0.834 



EPOCH NUMBER:  5 (RUN:  1 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6573044914708903, 'tp': 2000, 'tn': 2140, 'fp': 353, 'fn': 507, 'auroc': 0.9077890370660506, 'auprc': 0.9082448349611152, 'eval_loss': 0.8341420259475708}
Correct predictions are:  4140
Total predictions are:  5000
Accuracy on test set is: 0.828 



EPOCH NUMBER:  6 (RUN:  1 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6695189718485408, 'tp': 2134, 'tn': 2039, 'fp': 454, 'fn': 373, 'auroc': 0.8982722424543809, 'auprc': 0.9000917186059922, 'eval_loss': 0.8673640041351318}
Correct predictions are:  4173
Total predictions are:  5000
Accuracy on test set is: 0.8346 



EPOCH NUMBER:  7 (RUN:  1 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.662978569876065, 'tp': 2058, 'tn': 2099, 'fp': 394, 'fn': 449, 'auroc': 0.900177457391266, 'auprc': 0.9023325622881634, 'eval_loss': 0.9109350340843201}
Correct predictions are:  4157
Total predictions are:  5000
Accuracy on test set is: 0.8314 



EPOCH NUMBER:  8 (RUN:  1 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6591566449796809, 'tp': 2041, 'tn': 2106, 'fp': 387, 'fn': 466, 'auroc': 0.8941322899971537, 'auprc': 0.8931660187933876, 'eval_loss': 0.9885231300354004}
Correct predictions are:  4147
Total predictions are:  5000
Accuracy on test set is: 0.8294 



EPOCH NUMBER:  9 (RUN:  1 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6572865356162038, 'tp': 2101, 'tn': 2042, 'fp': 451, 'fn': 406, 'auroc': 0.8934311645003297, 'auprc': 0.8920123219077563, 'eval_loss': 1.0200116668701171}
Correct predictions are:  4143
Total predictions are:  5000
Accuracy on test set is: 0.8286 


[0.8366, 0.834, 0.8364, 0.8358, 0.834, 0.828, 0.8346, 0.8314, 0.8294, 0.8286]

RUN NUMBER:  2


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  2 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.502096896439668, 'auprc': 0.503205541554626, 'eval_loss': 0.69309248046875}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  1 (RUN:  2 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.5064348504492275, 'auprc': 0.5051033413333303, 'eval_loss': 0.69312412109375}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  2 (RUN:  2 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.48012032414334127, 'auprc': 0.49021470156577185, 'eval_loss': 0.69628095703125}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  3 (RUN:  2 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.5005199240762047, 'auprc': 0.5016646726229494, 'eval_loss': 0.69308447265625}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  4 (RUN:  2 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.5557781973010668, 'auprc': 0.5420381565838872, 'eval_loss': 0.69394609375}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  5 (RUN:  2 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.626664833052291, 'auprc': 0.5959334263255676, 'eval_loss': 0.6934279296875}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  6 (RUN:  2 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.07208159467079563, 'tp': 43, 'tn': 2486, 'fp': 7, 'fn': 2464, 'auroc': 0.48243754231033165, 'auprc': 0.49722617508742134, 'eval_loss': 0.6956465087890625}
Correct predictions are:  2529
Total predictions are:  5000
Accuracy on test set is: 0.5058 



EPOCH NUMBER:  7 (RUN:  2 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.5221943340035786, 'auprc': 0.4911962923570527, 'eval_loss': 0.716746240234375}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  8 (RUN:  2 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.3751839057536444, 'tp': 2227, 'tn': 1118, 'fp': 1375, 'fn': 280, 'auroc': 0.6761250608204769, 'auprc': 0.6154206293912263, 'eval_loss': 0.61928603515625}
Correct predictions are:  3345
Total predictions are:  5000
Accuracy on test set is: 0.669 



EPOCH NUMBER:  9 (RUN:  2 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.35064015253986197, 'tp': 2286, 'tn': 963, 'fp': 1530, 'fn': 221, 'auroc': 0.6478088388212964, 'auprc': 0.5901994239368237, 'eval_loss': 0.63158125}
Correct predictions are:  3249
Total predictions are:  5000
Accuracy on test set is: 0.6498 


[0.5014, 0.4986, 0.4986, 0.5014, 0.4986, 0.4986, 0.5058, 0.4986, 0.669, 0.6498]

RUN NUMBER:  3


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  3 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.651337492873997, 'tp': 2188, 'tn': 1933, 'fp': 560, 'fn': 319, 'auroc': 0.9082216004573476, 'auprc': 0.9036687380040541, 'eval_loss': 0.42142088623046875}
Correct predictions are:  4121
Total predictions are:  5000
Accuracy on test set is: 0.8242 



EPOCH NUMBER:  1 (RUN:  3 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6582768682913144, 'tp': 2024, 'tn': 2120, 'fp': 373, 'fn': 483, 'auroc': 0.910751540292076, 'auprc': 0.9051842560595753, 'eval_loss': 0.4323815490722656}
Correct predictions are:  4144
Total predictions are:  5000
Accuracy on test set is: 0.8288 



EPOCH NUMBER:  2 (RUN:  3 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6558050323143068, 'tp': 2024, 'tn': 2114, 'fp': 379, 'fn': 483, 'auroc': 0.9122867523281384, 'auprc': 0.9085910846940682, 'eval_loss': 0.589274454498291}
Correct predictions are:  4138
Total predictions are:  5000
Accuracy on test set is: 0.8276 



EPOCH NUMBER:  3 (RUN:  3 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6405890275203484, 'tp': 1964, 'tn': 2133, 'fp': 360, 'fn': 543, 'auroc': 0.9073007132375916, 'auprc': 0.9029424804602834, 'eval_loss': 0.7526485969543457}
Correct predictions are:  4097
Total predictions are:  5000
Accuracy on test set is: 0.8194 



EPOCH NUMBER:  4 (RUN:  3 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6493337935320714, 'tp': 1977, 'tn': 2142, 'fp': 351, 'fn': 530, 'auroc': 0.8871987956385579, 'auprc': 0.8973191550306525, 'eval_loss': 0.8653260186195374}
Correct predictions are:  4119
Total predictions are:  5000
Accuracy on test set is: 0.8238 



EPOCH NUMBER:  5 (RUN:  3 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6373986350562767, 'tp': 2024, 'tn': 2069, 'fp': 424, 'fn': 483, 'auroc': 0.8956925422295312, 'auprc': 0.8639966605671867, 'eval_loss': 0.938182165145874}
Correct predictions are:  4093
Total predictions are:  5000
Accuracy on test set is: 0.8186 



EPOCH NUMBER:  6 (RUN:  3 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6548122778222566, 'tp': 2086, 'tn': 2051, 'fp': 442, 'fn': 421, 'auroc': 0.9086470437928234, 'auprc': 0.9021354959233213, 'eval_loss': 0.8842182542800904}
Correct predictions are:  4137
Total predictions are:  5000
Accuracy on test set is: 0.8274 



EPOCH NUMBER:  7 (RUN:  3 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6551069260305455, 'tp': 1991, 'tn': 2143, 'fp': 350, 'fn': 516, 'auroc': 0.9085080027027411, 'auprc': 0.9020003047639777, 'eval_loss': 1.0339783118724823}
Correct predictions are:  4134
Total predictions are:  5000
Accuracy on test set is: 0.8268 



EPOCH NUMBER:  8 (RUN:  3 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6520789093273193, 'tp': 2054, 'tn': 2076, 'fp': 417, 'fn': 453, 'auroc': 0.9024421151461828, 'auprc': 0.8839775044110647, 'eval_loss': 0.9574510299682617}
Correct predictions are:  4130
Total predictions are:  5000
Accuracy on test set is: 0.826 



EPOCH NUMBER:  9 (RUN:  3 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6420049621051995, 'tp': 2055, 'tn': 2050, 'fp': 443, 'fn': 452, 'auroc': 0.860228664192727, 'auprc': 0.8828071578940274, 'eval_loss': 1.0867688000679017}
Correct predictions are:  4105
Total predictions are:  5000
Accuracy on test set is: 0.821 


[0.8242, 0.8288, 0.8276, 0.8194, 0.8238, 0.8186, 0.8274, 0.8268, 0.826, 0.821]

RUN NUMBER:  4


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  4 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6571184528024923, 'tp': 2198, 'tn': 1937, 'fp': 556, 'fn': 309, 'auroc': 0.911974669881412, 'auprc': 0.9082934095078208, 'eval_loss': 0.4032504684448242}
Correct predictions are:  4135
Total predictions are:  5000
Accuracy on test set is: 0.827 



EPOCH NUMBER:  1 (RUN:  4 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6588958274253175, 'tp': 2104, 'tn': 2043, 'fp': 450, 'fn': 403, 'auroc': 0.9136918033437381, 'auprc': 0.910527448579111, 'eval_loss': 0.4678950656890869}
Correct predictions are:  4147
Total predictions are:  5000
Accuracy on test set is: 0.8294 



EPOCH NUMBER:  2 (RUN:  4 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6751969420504098, 'tp': 2102, 'tn': 2086, 'fp': 407, 'fn': 405, 'auroc': 0.9170533496982617, 'auprc': 0.9122847032560597, 'eval_loss': 0.5237377532958984}
Correct predictions are:  4188
Total predictions are:  5000
Accuracy on test set is: 0.8376 



EPOCH NUMBER:  3 (RUN:  4 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6697447556978724, 'tp': 2122, 'tn': 2052, 'fp': 441, 'fn': 385, 'auroc': 0.9148618125166101, 'auprc': 0.9102257863166527, 'eval_loss': 0.7193467720031739}
Correct predictions are:  4174
Total predictions are:  5000
Accuracy on test set is: 0.8348 



EPOCH NUMBER:  4 (RUN:  4 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6580867118650743, 'tp': 2102, 'tn': 2043, 'fp': 450, 'fn': 405, 'auroc': 0.9109369817459368, 'auprc': 0.9064855811397103, 'eval_loss': 0.7893782001495361}
Correct predictions are:  4145
Total predictions are:  5000
Accuracy on test set is: 0.829 



EPOCH NUMBER:  5 (RUN:  4 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.662418805202684, 'tp': 2097, 'tn': 2059, 'fp': 434, 'fn': 410, 'auroc': 0.9110968229990923, 'auprc': 0.9053691693650361, 'eval_loss': 0.8838412836074829}
Correct predictions are:  4156
Total predictions are:  5000
Accuracy on test set is: 0.8312 



EPOCH NUMBER:  6 (RUN:  4 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6569505986015678, 'tp': 2028, 'tn': 2113, 'fp': 380, 'fn': 479, 'auroc': 0.9037908457202304, 'auprc': 0.8915619342831956, 'eval_loss': 0.9755814904689789}
Correct predictions are:  4141
Total predictions are:  5000
Accuracy on test set is: 0.8282 



EPOCH NUMBER:  7 (RUN:  4 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6552492060910937, 'tp': 2062, 'tn': 2076, 'fp': 417, 'fn': 445, 'auroc': 0.8943853319810027, 'auprc': 0.8539897959324756, 'eval_loss': 1.0032439281463623}
Correct predictions are:  4138
Total predictions are:  5000
Accuracy on test set is: 0.8276 



EPOCH NUMBER:  8 (RUN:  4 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6499992361345107, 'tp': 2075, 'tn': 2050, 'fp': 443, 'fn': 432, 'auroc': 0.9020593121450073, 'auprc': 0.9010708254961012, 'eval_loss': 1.0007293370246888}
Correct predictions are:  4125
Total predictions are:  5000
Accuracy on test set is: 0.825 



EPOCH NUMBER:  9 (RUN:  4 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6556861831184628, 'tp': 2099, 'tn': 2040, 'fp': 453, 'fn': 408, 'auroc': 0.9050594156658187, 'auprc': 0.8923917291604642, 'eval_loss': 1.0761113142967225}
Correct predictions are:  4139
Total predictions are:  5000
Accuracy on test set is: 0.8278 


[0.827, 0.8294, 0.8376, 0.8348, 0.829, 0.8312, 0.8282, 0.8276, 0.825, 0.8278]

RUN NUMBER:  5


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  5 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6728293777481057, 'tp': 2112, 'tn': 2070, 'fp': 423, 'fn': 395, 'auroc': 0.9128080364150054, 'auprc': 0.9085870527001361, 'eval_loss': 0.3854837005615234}
Correct predictions are:  4182
Total predictions are:  5000
Accuracy on test set is: 0.8364 



EPOCH NUMBER:  1 (RUN:  5 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6655604711819461, 'tp': 1965, 'tn': 2191, 'fp': 302, 'fn': 542, 'auroc': 0.9144750894847016, 'auprc': 0.9098429154478606, 'eval_loss': 0.46654583740234373}
Correct predictions are:  4156
Total predictions are:  5000
Accuracy on test set is: 0.8312 



EPOCH NUMBER:  2 (RUN:  5 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6687968495583926, 'tp': 2051, 'tn': 2120, 'fp': 373, 'fn': 456, 'auroc': 0.9162315832556127, 'auprc': 0.9117875869534933, 'eval_loss': 0.6468511639118194}
Correct predictions are:  4171
Total predictions are:  5000
Accuracy on test set is: 0.8342 



EPOCH NUMBER:  3 (RUN:  5 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6695983640235492, 'tp': 2093, 'tn': 2081, 'fp': 412, 'fn': 414, 'auroc': 0.9076000755845925, 'auprc': 0.8900116878662718, 'eval_loss': 0.73689903049469}
Correct predictions are:  4174
Total predictions are:  5000
Accuracy on test set is: 0.8348 



EPOCH NUMBER:  4 (RUN:  5 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6824087962068326, 'tp': 2119, 'tn': 2087, 'fp': 406, 'fn': 388, 'auroc': 0.9149584532742736, 'auprc': 0.9095512624720915, 'eval_loss': 0.7590763069152832}
Correct predictions are:  4206
Total predictions are:  5000
Accuracy on test set is: 0.8412 



EPOCH NUMBER:  5 (RUN:  5 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6740682168046104, 'tp': 2083, 'tn': 2102, 'fp': 391, 'fn': 424, 'auroc': 0.9114272255894487, 'auprc': 0.9064375738420996, 'eval_loss': 0.9058428838729858}
Correct predictions are:  4185
Total predictions are:  5000
Accuracy on test set is: 0.837 



EPOCH NUMBER:  6 (RUN:  5 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6635060113185026, 'tp': 1991, 'tn': 2163, 'fp': 330, 'fn': 516, 'auroc': 0.9112172239430357, 'auprc': 0.9071094140693794, 'eval_loss': 0.9267828496932984}
Correct predictions are:  4154
Total predictions are:  5000
Accuracy on test set is: 0.8308 



EPOCH NUMBER:  7 (RUN:  5 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6532395731944172, 'tp': 1939, 'tn': 2185, 'fp': 308, 'fn': 568, 'auroc': 0.9126492351700037, 'auprc': 0.9035527216621917, 'eval_loss': 1.0204994542121888}
Correct predictions are:  4124
Total predictions are:  5000
Accuracy on test set is: 0.8248 



EPOCH NUMBER:  8 (RUN:  5 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6539698501375155, 'tp': 2000, 'tn': 2132, 'fp': 361, 'fn': 507, 'auroc': 0.9112389841136355, 'auprc': 0.9052647032009595, 'eval_loss': 0.9710656471252441}
Correct predictions are:  4132
Total predictions are:  5000
Accuracy on test set is: 0.8264 



EPOCH NUMBER:  9 (RUN:  5 COMB:  RSW_LOW_STM )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6380928693057478, 'tp': 1943, 'tn': 2146, 'fp': 347, 'fn': 564, 'auroc': 0.9009273832706849, 'auprc': 0.8978691249102105, 'eval_loss': 0.9484843301773072}
Correct predictions are:  4089
Total predictions are:  5000
Accuracy on test set is: 0.8178 


[0.8364, 0.8312, 0.8342, 0.8348, 0.8412, 0.837, 0.8308, 0.8248, 0.8264, 0.8178]


 Over all runs maximum accuracies are: [0.669, 0.8288, 0.8366, 0.8376, 0.8412]
The median is: 0.8366
XLNet Accuracy Score on Test set ->  ['0.8366 +/- 0.16759999999999997']


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a



* * * * EVALUATION USING RSW_STM_LOW AS PREPROCESSING FUNCTION * * * *

RUN NUMBER:  1


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  1 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 0, 'tn': 2493, 'fp': 0, 'fn': 2507, 'auroc': 0.5360261224448, 'auprc': 0.5276446377891721, 'eval_loss': 0.69339013671875}
Correct predictions are:  2493
Total predictions are:  5000
Accuracy on test set is: 0.4986 



EPOCH NUMBER:  1 (RUN:  1 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.24362819540783998, 'tp': 915, 'tn': 2115, 'fp': 378, 'fn': 1592, 'auroc': 0.6956428938402878, 'auprc': 0.677772529658286, 'eval_loss': 0.6639009765625}
Correct predictions are:  3030
Total predictions are:  5000
Accuracy on test set is: 0.606 



EPOCH NUMBER:  2 (RUN:  1 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.6219077557568051, 'auprc': 0.5823953198024266, 'eval_loss': 0.6959546875}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  3 (RUN:  1 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.5859325137109075, 'auprc': 0.5702994503646588, 'eval_loss': 0.6933001953125}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  4 (RUN:  1 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.11704514572622367, 'tp': 1999, 'tn': 759, 'fp': 1734, 'fn': 508, 'auroc': 0.6893312443569557, 'auprc': 0.6995807451053572, 'eval_loss': 0.68662822265625}
Correct predictions are:  2758
Total predictions are:  5000
Accuracy on test set is: 0.5516 



EPOCH NUMBER:  5 (RUN:  1 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.35304955395098137, 'tp': 1171, 'tn': 2139, 'fp': 354, 'fn': 1336, 'auroc': 0.7045691238219308, 'auprc': 0.6760794089776084, 'eval_loss': 0.63219248046875}
Correct predictions are:  3310
Total predictions are:  5000
Accuracy on test set is: 0.662 



EPOCH NUMBER:  6 (RUN:  1 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.4125593668443949, 'tp': 1578, 'tn': 1941, 'fp': 552, 'fn': 929, 'auroc': 0.7586381877233919, 'auprc': 0.7558042035245427, 'eval_loss': 0.589384228515625}
Correct predictions are:  3519
Total predictions are:  5000
Accuracy on test set is: 0.7038 



EPOCH NUMBER:  7 (RUN:  1 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.0, 'tp': 2507, 'tn': 0, 'fp': 2493, 'fn': 0, 'auroc': 0.7933555799077465, 'auprc': 0.7853375139587071, 'eval_loss': 0.6396643798828126}
Correct predictions are:  2507
Total predictions are:  5000
Accuracy on test set is: 0.5014 



EPOCH NUMBER:  8 (RUN:  1 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.4951163778361888, 'tp': 1767, 'tn': 1966, 'fp': 527, 'fn': 740, 'auroc': 0.8116540433677, 'auprc': 0.7997153013235603, 'eval_loss': 0.547951513671875}
Correct predictions are:  3733
Total predictions are:  5000
Accuracy on test set is: 0.7466 



EPOCH NUMBER:  9 (RUN:  1 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.510968870767586, 'tp': 1975, 'tn': 1800, 'fp': 693, 'fn': 532, 'auroc': 0.827714409280969, 'auprc': 0.8205109108399746, 'eval_loss': 0.548963720703125}
Correct predictions are:  3775
Total predictions are:  5000
Accuracy on test set is: 0.755 


[0.4986, 0.606, 0.5014, 0.5014, 0.5516, 0.662, 0.7038, 0.5014, 0.7466, 0.755]

RUN NUMBER:  2


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  2 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.5880913412951504, 'tp': 2017, 'tn': 1953, 'fp': 540, 'fn': 490, 'auroc': 0.8725335606631155, 'auprc': 0.8656676937319293, 'eval_loss': 0.45784788818359373}
Correct predictions are:  3970
Total predictions are:  5000
Accuracy on test set is: 0.794 



EPOCH NUMBER:  1 (RUN:  2 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6444464365775194, 'tp': 2080, 'tn': 2031, 'fp': 462, 'fn': 427, 'auroc': 0.8990928088876217, 'auprc': 0.8935267515413376, 'eval_loss': 0.4450712890625}
Correct predictions are:  4111
Total predictions are:  5000
Accuracy on test set is: 0.8222 



EPOCH NUMBER:  2 (RUN:  2 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6348265024458182, 'tp': 2006, 'tn': 2080, 'fp': 413, 'fn': 501, 'auroc': 0.8982044019225112, 'auprc': 0.8951934936910524, 'eval_loss': 0.5147148620605468}
Correct predictions are:  4086
Total predictions are:  5000
Accuracy on test set is: 0.8172 



EPOCH NUMBER:  3 (RUN:  2 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6383963622011163, 'tp': 2057, 'tn': 2039, 'fp': 454, 'fn': 450, 'auroc': 0.8751691813263817, 'auprc': 0.8199292559688074, 'eval_loss': 0.5704257293701172}
Correct predictions are:  4096
Total predictions are:  5000
Accuracy on test set is: 0.8192 



EPOCH NUMBER:  4 (RUN:  2 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6411123921288459, 'tp': 1942, 'tn': 2154, 'fp': 339, 'fn': 565, 'auroc': 0.8830745233042627, 'auprc': 0.875731419818005, 'eval_loss': 0.6240694091796875}
Correct predictions are:  4096
Total predictions are:  5000
Accuracy on test set is: 0.8192 



EPOCH NUMBER:  5 (RUN:  2 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6330795539055143, 'tp': 1963, 'tn': 2116, 'fp': 377, 'fn': 544, 'auroc': 0.8466274375591105, 'auprc': 0.789469985271596, 'eval_loss': 0.57747099609375}
Correct predictions are:  4079
Total predictions are:  5000
Accuracy on test set is: 0.8158 



EPOCH NUMBER:  6 (RUN:  2 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6228808905608121, 'tp': 2017, 'tn': 2040, 'fp': 453, 'fn': 490, 'auroc': 0.885400221537737, 'auprc': 0.8497476394110264, 'eval_loss': 0.5971680053710937}
Correct predictions are:  4057
Total predictions are:  5000
Accuracy on test set is: 0.8114 



EPOCH NUMBER:  7 (RUN:  2 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6332004447739891, 'tp': 2046, 'tn': 2037, 'fp': 456, 'fn': 461, 'auroc': 0.8779218429072484, 'auprc': 0.8342880503514112, 'eval_loss': 0.629887612915039}
Correct predictions are:  4083
Total predictions are:  5000
Accuracy on test set is: 0.8166 



EPOCH NUMBER:  8 (RUN:  2 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6318519786998554, 'tp': 2160, 'tn': 1913, 'fp': 580, 'fn': 347, 'auroc': 0.828319614025774, 'auprc': 0.7592516700667862, 'eval_loss': 0.5141024230957031}
Correct predictions are:  4073
Total predictions are:  5000
Accuracy on test set is: 0.8146 



EPOCH NUMBER:  9 (RUN:  2 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.4510425095056129, 'tp': 1282, 'tn': 2253, 'fp': 240, 'fn': 1225, 'auroc': 0.717777707377226, 'auprc': 0.6996063242406481, 'eval_loss': 0.5857440673828125}
Correct predictions are:  3535
Total predictions are:  5000
Accuracy on test set is: 0.707 


[0.794, 0.8222, 0.8172, 0.8192, 0.8192, 0.8158, 0.8114, 0.8166, 0.8146, 0.707]

RUN NUMBER:  3


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  3 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6300049112249061, 'tp': 2040, 'tn': 2035, 'fp': 458, 'fn': 467, 'auroc': 0.8975357566803324, 'auprc': 0.8933110699602134, 'eval_loss': 0.43407160339355466}
Correct predictions are:  4075
Total predictions are:  5000
Accuracy on test set is: 0.815 



EPOCH NUMBER:  1 (RUN:  3 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6496504896669503, 'tp': 2087, 'tn': 2037, 'fp': 456, 'fn': 420, 'auroc': 0.9084168819883549, 'auprc': 0.9052213047033472, 'eval_loss': 0.4130213363647461}
Correct predictions are:  4124
Total predictions are:  5000
Accuracy on test set is: 0.8248 



EPOCH NUMBER:  2 (RUN:  3 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6658116073996645, 'tp': 2059, 'tn': 2105, 'fp': 388, 'fn': 448, 'auroc': 0.9125647545076754, 'auprc': 0.9098309293754021, 'eval_loss': 0.4826184097290039}
Correct predictions are:  4164
Total predictions are:  5000
Accuracy on test set is: 0.8328 



EPOCH NUMBER:  3 (RUN:  3 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6520310925861823, 'tp': 2061, 'tn': 2069, 'fp': 424, 'fn': 446, 'auroc': 0.8902082592327523, 'auprc': 0.8567637099160957, 'eval_loss': 0.6915330924987793}
Correct predictions are:  4130
Total predictions are:  5000
Accuracy on test set is: 0.826 



EPOCH NUMBER:  4 (RUN:  3 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.653690115646245, 'tp': 2097, 'tn': 2037, 'fp': 456, 'fn': 410, 'auroc': 0.9064501465691492, 'auprc': 0.9013146454001157, 'eval_loss': 0.6999937400817872}
Correct predictions are:  4134
Total predictions are:  5000
Accuracy on test set is: 0.8268 



EPOCH NUMBER:  5 (RUN:  3 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6517644145090368, 'tp': 2045, 'tn': 2084, 'fp': 409, 'fn': 462, 'auroc': 0.9065074270182277, 'auprc': 0.9009807136842627, 'eval_loss': 0.7622245994567871}
Correct predictions are:  4129
Total predictions are:  5000
Accuracy on test set is: 0.8258 



EPOCH NUMBER:  6 (RUN:  3 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6412525582141321, 'tp': 2077, 'tn': 2026, 'fp': 467, 'fn': 430, 'auroc': 0.9001671373103567, 'auprc': 0.8832165338947606, 'eval_loss': 0.8321834259033203}
Correct predictions are:  4103
Total predictions are:  5000
Accuracy on test set is: 0.8206 



EPOCH NUMBER:  7 (RUN:  3 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6416781687737135, 'tp': 2041, 'tn': 2063, 'fp': 430, 'fn': 466, 'auroc': 0.9051002959863206, 'auprc': 0.8976676122312272, 'eval_loss': 0.8690972995758056}
Correct predictions are:  4104
Total predictions are:  5000
Accuracy on test set is: 0.8208 



EPOCH NUMBER:  8 (RUN:  3 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.636451778689244, 'tp': 2038, 'tn': 2053, 'fp': 440, 'fn': 469, 'auroc': 0.8611377113196568, 'auprc': 0.8637182282969376, 'eval_loss': 0.9220770069122315}
Correct predictions are:  4091
Total predictions are:  5000
Accuracy on test set is: 0.8182 



EPOCH NUMBER:  9 (RUN:  3 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6252230934626197, 'tp': 2029, 'tn': 2034, 'fp': 459, 'fn': 478, 'auroc': 0.8554521467448306, 'auprc': 0.8469921207815734, 'eval_loss': 0.8357799354553223}
Correct predictions are:  4063
Total predictions are:  5000
Accuracy on test set is: 0.8126 


[0.815, 0.8248, 0.8328, 0.826, 0.8268, 0.8258, 0.8206, 0.8208, 0.8182, 0.8126]

RUN NUMBER:  4


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  4 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.418574141451921, 'tp': 1542, 'tn': 1986, 'fp': 507, 'fn': 965, 'auroc': 0.784871113389529, 'auprc': 0.769908553752342, 'eval_loss': 0.598444287109375}
Correct predictions are:  3528
Total predictions are:  5000
Accuracy on test set is: 0.7056 



EPOCH NUMBER:  1 (RUN:  4 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6064106976589747, 'tp': 2026, 'tn': 1990, 'fp': 503, 'fn': 481, 'auroc': 0.8661646307307049, 'auprc': 0.8063324736791966, 'eval_loss': 0.4566586883544922}
Correct predictions are:  4016
Total predictions are:  5000
Accuracy on test set is: 0.8032 



EPOCH NUMBER:  2 (RUN:  4 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.575234594449464, 'tp': 1964, 'tn': 1974, 'fp': 519, 'fn': 543, 'auroc': 0.8701708221392455, 'auprc': 0.8641643255761904, 'eval_loss': 0.5032282958984375}
Correct predictions are:  3938
Total predictions are:  5000
Accuracy on test set is: 0.7876 



EPOCH NUMBER:  3 (RUN:  4 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6175558979356979, 'tp': 1988, 'tn': 2055, 'fp': 438, 'fn': 519, 'auroc': 0.8910137855480786, 'auprc': 0.8877109598371615, 'eval_loss': 0.5071920654296875}
Correct predictions are:  4043
Total predictions are:  5000
Accuracy on test set is: 0.8086 



EPOCH NUMBER:  4 (RUN:  4 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.638514985717513, 'tp': 2123, 'tn': 1971, 'fp': 522, 'fn': 384, 'auroc': 0.889554174104725, 'auprc': 0.8589453910345357, 'eval_loss': 0.5448739074707031}
Correct predictions are:  4094
Total predictions are:  5000
Accuracy on test set is: 0.8188 



EPOCH NUMBER:  5 (RUN:  4 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6271193875127081, 'tp': 1954, 'tn': 2110, 'fp': 383, 'fn': 553, 'auroc': 0.89485709567963, 'auprc': 0.8902795264467, 'eval_loss': 0.6798863647460938}
Correct predictions are:  4064
Total predictions are:  5000
Accuracy on test set is: 0.8128 



EPOCH NUMBER:  6 (RUN:  4 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.636009346162854, 'tp': 2062, 'tn': 2028, 'fp': 465, 'fn': 445, 'auroc': 0.8949654965294926, 'auprc': 0.8851999270102189, 'eval_loss': 0.6411881286621094}
Correct predictions are:  4090
Total predictions are:  5000
Accuracy on test set is: 0.818 



EPOCH NUMBER:  7 (RUN:  4 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.597931042877122, 'tp': 2264, 'tn': 1694, 'fp': 799, 'fn': 243, 'auroc': 0.853171968868236, 'auprc': 0.7920417201606825, 'eval_loss': 0.5687670959472656}
Correct predictions are:  3958
Total predictions are:  5000
Accuracy on test set is: 0.7916 



EPOCH NUMBER:  8 (RUN:  4 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6107930095856856, 'tp': 1943, 'tn': 2081, 'fp': 412, 'fn': 564, 'auroc': 0.8501776253925831, 'auprc': 0.8440080505620084, 'eval_loss': 0.7136015151977539}
Correct predictions are:  4024
Total predictions are:  5000
Accuracy on test set is: 0.8048 



EPOCH NUMBER:  9 (RUN:  4 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6272010665902328, 'tp': 2048, 'tn': 2020, 'fp': 473, 'fn': 459, 'auroc': 0.8730250845166626, 'auprc': 0.8662483114839434, 'eval_loss': 0.6665601654052734}
Correct predictions are:  4068
Total predictions are:  5000
Accuracy on test set is: 0.8136 


[0.7056, 0.8032, 0.7876, 0.8086, 0.8188, 0.8128, 0.818, 0.7916, 0.8048, 0.8136]

RUN NUMBER:  5


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a


EPOCH NUMBER:  0 (RUN:  5 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.622775521398616, 'tp': 2113, 'tn': 1941, 'fp': 552, 'fn': 394, 'auroc': 0.8956122215998175, 'auprc': 0.8912156832777001, 'eval_loss': 0.4185722412109375}
Correct predictions are:  4054
Total predictions are:  5000
Accuracy on test set is: 0.8108 



EPOCH NUMBER:  1 (RUN:  5 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6508086277191808, 'tp': 2080, 'tn': 2047, 'fp': 446, 'fn': 427, 'auroc': 0.9060335833032931, 'auprc': 0.8998937037500205, 'eval_loss': 0.41943475189208984}
Correct predictions are:  4127
Total predictions are:  5000
Accuracy on test set is: 0.8254 



EPOCH NUMBER:  2 (RUN:  5 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6473464597991633, 'tp': 2041, 'tn': 2077, 'fp': 416, 'fn': 466, 'auroc': 0.9039546870047463, 'auprc': 0.8989805060441018, 'eval_loss': 0.5701704887390137}
Correct predictions are:  4118
Total predictions are:  5000
Accuracy on test set is: 0.8236 



EPOCH NUMBER:  3 (RUN:  5 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6383083523769646, 'tp': 2115, 'tn': 1979, 'fp': 514, 'fn': 392, 'auroc': 0.9044998912791475, 'auprc': 0.8985011032097393, 'eval_loss': 0.6481840538024902}
Correct predictions are:  4094
Total predictions are:  5000
Accuracy on test set is: 0.8188 



EPOCH NUMBER:  4 (RUN:  5 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6423964166019225, 'tp': 2062, 'tn': 2044, 'fp': 449, 'fn': 445, 'auroc': 0.9048376539272067, 'auprc': 0.8918591922079544, 'eval_loss': 0.7738823558807373}
Correct predictions are:  4106
Total predictions are:  5000
Accuracy on test set is: 0.8212 



EPOCH NUMBER:  5 (RUN:  5 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6464424835148332, 'tp': 2052, 'tn': 2064, 'fp': 429, 'fn': 455, 'auroc': 0.8969408720164365, 'auprc': 0.877617702935957, 'eval_loss': 0.8748640419006347}
Correct predictions are:  4116
Total predictions are:  5000
Accuracy on test set is: 0.8232 



EPOCH NUMBER:  6 (RUN:  5 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6519965471638574, 'tp': 2074, 'tn': 2056, 'fp': 437, 'fn': 433, 'auroc': 0.9062253448067034, 'auprc': 0.8972876858455553, 'eval_loss': 0.8730730911254883}
Correct predictions are:  4130
Total predictions are:  5000
Accuracy on test set is: 0.826 



EPOCH NUMBER:  7 (RUN:  5 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6448566026661047, 'tp': 2082, 'tn': 2030, 'fp': 463, 'fn': 425, 'auroc': 0.9061016638370445, 'auprc': 0.8939053690781055, 'eval_loss': 0.875917855834961}
Correct predictions are:  4112
Total predictions are:  5000
Accuracy on test set is: 0.8224 



EPOCH NUMBER:  8 (RUN:  5 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.6441706426488623, 'tp': 2093, 'tn': 2017, 'fp': 476, 'fn': 414, 'auroc': 0.8595277786977852, 'auprc': 0.7972963077831412, 'eval_loss': 0.8453016525268555}
Correct predictions are:  4110
Total predictions are:  5000
Accuracy on test set is: 0.822 



EPOCH NUMBER:  9 (RUN:  5 COMB:  RSW_STM_LOW )

NOW TRAIN THE MODEL.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."



NOW EVALUATE THE TEST DF.


  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


{'mcc': 0.01410400339712391, 'tp': 1, 'tn': 2493, 'fp': 0, 'fn': 2506, 'auroc': 0.5299040744479436, 'auprc': 0.5316348038421375, 'eval_loss': 0.6937904907226562}
Correct predictions are:  2494
Total predictions are:  5000
Accuracy on test set is: 0.4988 


[0.8108, 0.8254, 0.8236, 0.8188, 0.8212, 0.8232, 0.826, 0.8224, 0.822, 0.4988]


 Over all runs maximum accuracies are: [0.755, 0.8188, 0.8222, 0.826, 0.8328]
The median is: 0.8222
XLNet Accuracy Score on Test set ->  ['0.8222 +/- 0.06720000000000004']


## Now show compact results in a table.

In [None]:
print(" PREPRO FUNCTION    |  Test Accuracy   |",end = '')

print("\n")
for prepro_func in prepro_functions_dict_comb:
  #print(prepro_func,"\t\t\t",format(round(model_results[prepro_func][0],4),'.4f'),"\t\t",end='')
  result = model_results[prepro_func][0]
  # result = format(round(model_results[prepro_func][0],4),'.4f')
  print(f'{prepro_func:27}{ result :12}')
  print("\n")

 PREPRO FUNCTION    |  Test Accuracy   |

DON                        0.8852 +/- 0.0030000000000000027


LOW                        0.8806 +/- 0.007399999999999962


RSW                        0.8858 +/- 0.0023999999999999577


STM                        0.8516 +/- 0.03160000000000007


LOW_RSW                    0.8784 +/- 0.006599999999999939


LOW_STM                    0.8468 +/- 0.006399999999999961


RSW_LOW                    0.8744 +/- 0.0050000000000000044


RSW_STM                    0.8234 +/- 0.32200000000000006


STM_LOW                    0.8446 +/- 0.032399999999999984


STM_RSW                    0.817 +/- 0.015799999999999925


LOW_STM_RSW                0.7818 +/- 0.11940000000000006


LOW_RSW_STM                0.8196 +/- 0.01859999999999995


STM_LOW_RSW                0.6454 +/- 0.18280000000000007


STM_RSW_LOW                0.8198 +/- 0.052200000000000024


RSW_LOW_STM                0.8366 +/- 0.16759999999999997


RSW_STM_LOW                0.8222 +/- 0.0672000