In [None]:
!pip install transformers
!pip install sentencepiece
!pip install openpyxl

In [24]:
!nvidia-smi

/bin/bash: /home/jstil/miniconda3/envs/tf/lib/libtinfo.so.6: no version information available (required by /bin/bash)
Sat Oct 29 10:02:46 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 520.61.05    Driver Version: 522.25       CUDA Version: 11.8     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  On   | 00000000:01:00.0  On |                  N/A |
| N/A   60C    P8    16W /  N/A |   6889MiB /  8192MiB |      8%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                  

In [1]:
import sys
import os
import time
import re
import random
from typing import Dict, List, Optional, Union
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, KFold, StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import tensorflow as tf
from transformers import BertTokenizer, BertConfig, TFBertForSequenceClassification
from transformers import DistilBertTokenizer, TFDistilBertForSequenceClassification
from transformers import RobertaTokenizer, TFRobertaForSequenceClassification
from transformers import ElectraTokenizer, TFElectraForSequenceClassification
from transformers import XLNetTokenizer, TFXLNetForSequenceClassification
from transformers import LongformerTokenizer, TFLongformerForSequenceClassification
from transformers import DebertaTokenizer, TFDebertaForSequenceClassification

2022-10-29 09:47:11.174183: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-10-29 09:47:11.396973: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2022-10-29 09:47:11.977983: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/jstil/miniconda3/envs/tf/lib/
2022-10-29 09:47:11.979972: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin

In [3]:
# set seed, TF uses python ramdom and numpy library, so these must also be fixed
tf.random.set_seed(0)
random.seed(0)
np.random.seed(0)
os.environ['PYTHONHASHSEED']=str(0)
os.environ['TF_DETERMINISTIC_OPS'] = '0'

In [None]:
# see if hardware accelerator available
tf.config.experimental.list_physical_devices()

In [None]:
tf.test.gpu_device_name()

In [5]:
PATH_sg1 = "data/final_labels_SG1.xlsx"
PATH_sg2 = "data/final_labels_SG2.xlsx"
df_sg1 = pd.read_excel(PATH_sg1)
df_sg2 = pd.read_excel(PATH_sg2)
df_sg1.rename(columns={'text': 'sentence', 'label_bias': 'Label_bias'}, inplace=True)
df_sg2.rename(columns={'text': 'sentence', 'label_bias': 'Label_bias'}, inplace=True)
df_sg1.head()

Unnamed: 0,sentence,news_link,outlet,topic,type,Label_bias,label_opinion,biased_words
0,The Republican president assumed he was helpin...,http://www.msnbc.com/rachel-maddow-show/auto-i...,msnbc,environment,left,Biased,Expresses writer’s opinion,[]
1,Though the indictment of a woman for her own p...,https://eu.usatoday.com/story/news/nation/2019...,usa-today,abortion,center,Non-biased,Somewhat factual but also opinionated,[]
2,Ingraham began the exchange by noting American...,https://www.breitbart.com/economy/2020/01/12/d...,breitbart,immigration,right,No agreement,No agreement,['flood']
3,The tragedy of America’s 18 years in Afghanist...,http://feedproxy.google.com/~r/breitbart/~3/ER...,breitbart,international-politics-and-world-news,right,Biased,Somewhat factual but also opinionated,"['tragedy', 'stubborn']"
4,The justices threw out a challenge from gun ri...,https://www.huffpost.com/entry/supreme-court-g...,msnbc,gun-control,left,Non-biased,Entirely factual,[]


In [6]:
# binarize classification problem
df_sg1 = df_sg1[df_sg1['Label_bias']!='No agreement']
df_sg1 = df_sg1[df_sg1['Label_bias'].isna()==False]
df_sg1.replace(to_replace='Biased', value=1, inplace=True)
df_sg1.replace(to_replace='Non-biased', value=0, inplace=True)

df_sg2 = df_sg2[df_sg2['Label_bias']!='No agreement']
df_sg2.replace(to_replace='Biased', value=1, inplace=True)
df_sg2.replace(to_replace='Non-biased', value=0, inplace=True)

# test pipeline set
df_sg1, exclude = train_test_split(df_sg1, test_size=0.95)
df_sg2, exclude = train_test_split(df_sg2, test_size=0.95)

In [8]:
# Stratified k-Fold instance
skfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

In [20]:
# helper functions called in skfold loop

def pd_to_tf(df):
    """convert a pandas dataframe into a tensorflow dataset"""
    target = df.pop('Label_bias')
    sentence = df.pop('sentence')
    return tf.data.Dataset.from_tensor_slices((sentence.values, target.values))

def plot_graphs(history, metric):
    plt.plot(history.history[metric])
    plt.plot(history.history['val_'+metric], '')
    plt.xlabel("Epochs")
    plt.ylabel(metric)
    plt.legend([metric, 'val_'+metric])
    plt.show()

def tokenize(df, model_name):
    """convert a pandas dataframe into a tensorflow dataset and run hugging face's tokenizer on data"""
    df2 = df.copy(deep=False)
    target = df2.pop('Label_bias')
    sentence = df2.pop('sentence')
    
    if model_name=='bert':
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    elif model_name=='roberta':
        tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    elif model_name=='deberta':
        tokenizer = DebertaTokenizer.from_pretrained("kamalkraj/deberta-base")
    elif model_name=='distilbert':
        tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
    elif model_name=='electra':
        tokenizer = ElectraTokenizer.from_pretrained('google/electra-small-discriminator')
    elif model_name=='xlnet':
        tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')

    train_encodings = tokenizer(
                        sentence.tolist(),                      
                        add_special_tokens = True, # add [CLS], [SEP]
                        truncation = True, # cut off at max length of the text that can go to BERT
                        padding = True, # add [PAD] tokens
                        return_attention_mask = True, # add attention mask to not focus on pad tokens
              )
    
    dataset = tf.data.Dataset.from_tensor_slices(
        (dict(train_encodings), 
         target.tolist()))
    return dataset

In [21]:
def run_model_5fold(df_name, df_train, model_name, freeze_encoder=False, pretrained=False, plot=False):
    """"freeze flags whether encoder layer should be frozen to not destroy transfer learning. Only set to false when enough data is provided"""

    # these variables will be needed for skfold to select indices
    Y = df_train['Label_bias']
    X = df_train['sentence']

    # hyperparams
    BUFFER_SIZE = 10000
    BATCH_SIZE = 12
    k = 1

    val_loss = []
    val_acc = []
    val_prec = []
    val_rec = []
    val_f1 = []
    val_f1_micro = []
    val_f1_wmacro = []
    
    if pretrained==True:
        optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)
        
        if model_name=='bert':
            transfer_model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased')
            
        elif model_name=='roberta':
            transfer_model = TFRobertaForSequenceClassification.from_pretrained('roberta-base')
            
        elif model_name=='deberta':
            transfer_model = TFDebertaForSequenceClassification.from_pretrained("kamalkraj/deberta-base")
            
        transfer_model.compile(optimizer=optimizer, loss='binary_crossentropy') 
        transfer_model.load_weights(f'./checkpoints/{model_name}_final_checkpoint_news_headlines_USA')
        trained_model_layer = transfer_model.get_layer(index=0).get_weights()
            

    for train_index, val_index in skfold.split(X,Y):
        print('### Start fold {}'.format(k))

        # split into train and validation set
        train_dataset = df_train.iloc[train_index]
        val_dataset = df_train.iloc[val_index]

        # prepare data for transformer
        train_dataset = tokenize(train_dataset, model_name)
        val_dataset = tokenize(val_dataset, model_name)

        # mini-batch it
        train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE)
        val_dataset = val_dataset.batch(BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE)

        # create new model
        if model_name == 'bert':
            model = TFBertForSequenceClassification.from_pretrained("bert-base-uncased")
        if model_name == 'distilbert':
            model = TFDistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased')
        elif model_name == 'roberta':
            model = TFRobertaForSequenceClassification.from_pretrained('roberta-base')
        elif model_name == 'electra':
            model = TFElectraForSequenceClassification.from_pretrained('google/electra-small-discriminator')
        elif model_name == 'xlnet':
            model = TFXLNetForSequenceClassification.from_pretrained('xlnet-base-cased')
        elif model_name == 'deberta':
            model = TFDebertaForSequenceClassification.from_pretrained("kamalkraj/deberta-base")


        if freeze_encoder == True:
            for w in model.get_layer(index=0).weights:
                w._trainable = False

        # compile it
        optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5) 
        model.compile(optimizer=optimizer, loss='binary_crossentropy') 

        # transfer learning
        if pretrained == True:
            model.get_layer(index=0).set_weights(trained_model_layer) # load bias-specific weights

        # after 2 epochs without improvement, stop training
        callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=1, restore_best_weights=True)

        # fit it
        history = model.fit(train_dataset, epochs=1, validation_data = val_dataset, callbacks=[callback])

        # plot history
        if plot:
            plot_graphs(history,'loss')

        # evaluate
        loss = model.evaluate(val_dataset)

        if model_name == 'xlnet':
            tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
            yhats = []
            for row in df_train.iloc[val_index]['sentence']:
                input = tokenizer(row, return_tensors="tf")
                output = model(input)
                logits = output.logits.numpy()[0]
                candidates = logits.tolist()
                decision = candidates.index(max(candidates))
                yhats.append(decision)
        else:
            logits = model.predict(val_dataset)  
            yhats = []
            for i in logits[0]:
                # assign class label according to highest logit
                candidates = i.tolist()
                decision = candidates.index(max(candidates))
                yhats.append(decision)

        y = []
        for text, label in val_dataset.unbatch():   
            y.append(label.numpy())

        val_loss.append(loss)
        val_acc.append(accuracy_score(y, yhats))
        val_prec.append(precision_score(y, yhats))
        val_rec.append(recall_score(y, yhats))
        val_f1.append(f1_score(y, yhats))
        val_f1_micro.append(f1_score(y, yhats, average='micro'))
        val_f1_wmacro.append(f1_score(y, yhats, average='weighted'))

        tf.keras.backend.clear_session()

        k += 1

        return {'loss': val_loss, 'acc': val_acc, 'prec': val_prec, 'rec': val_rec, 'f1': val_f1, 
                'f1_micro': val_f1_micro, 'f1_wmacro': val_f1_wmacro, 'model_name': model_name, 
                'distant': pretrained, 'df_name': df_name} 

In [11]:
def measure(d, results):
    loss_cv = np.mean(d['loss'])
    acc_cv = np.mean(d['acc'])
    prec_cv = np.mean(d['prec'])
    rec_cv = np.mean(d['rec'])
    f1_cv = np.mean(d['f1'])
    f1_micro_cv = np.mean(d['f1_micro'])
    f1_wmacro_cv = np.mean(d['f1_wmacro'])
    
    row = {
        'Dataset': d['df_name'],
        'Model': d['model_name'], 
        'Distant': d['distant'], 
        'Loss': loss_cv, 
        'Accuracy': acc_cv, 
        'Precision': prec_cv, 
        'Recall': rec_cv, 
        'F1': f1_cv, 
        'F1 Micro': f1_micro_cv, 
        'F1 Weighted': f1_wmacro_cv
    }
    
    results = results.append(row, ignore_index=True)
    print(row)
    return results


In [12]:
## instantiate results df
columns = ['Dataset', 'Model', 'Distant', 'Loss', 'Accuracy', 'Precision', 'Recall', 'F1', 'F1 Micro', 'F1 Weighted']
results = pd.DataFrame(columns=columns)

## Bert

In [None]:
# Sg1 
results = measure(run_model_5fold('sg1', df_sg1, 'bert', freeze_encoder=False, pretrained=False, plot=False), results)
print(results)

In [None]:
# Sg2 
results = measure(run_model_5fold('sg2', df_sg2, 'bert', freeze_encoder=False, pretrained=False, plot=False), results)
print(results)

## Bert w/ Distant

In [None]:
# Sg1 
results = measure(run_model_5fold('sg1', df_sg1, 'bert', freeze_encoder=False, pretrained=True, plot=False), results)
print(results)

In [None]:
# Sg2 
results = measure(run_model_5fold('sg2', df_sg2, 'bert', freeze_encoder=False, pretrained=True, plot=False), results)
print(results)

## Roberta

In [None]:
# Sg1 
results = measure(run_model_5fold('sg1', df_sg1, 'roberta', freeze_encoder=False, pretrained=False, plot=False), results)
print(results)

In [None]:
# Sg2 
results = measure(run_model_5fold('sg2', df_sg2, 'roberta', freeze_encoder=False, pretrained=False, plot=False), results)
print(results)

## Roberta w/ Distant

In [None]:
# Sg1 
results = measure(run_model_5fold('sg1', df_sg1, 'roberta', freeze_encoder=False, pretrained=True, plot=False), results)
print(results)

In [15]:
# Sg2 
results = measure(run_model_5fold('sg2', df_sg2, 'roberta', freeze_encoder=False, pretrained=True, plot=False), results)
print(results)

All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


### Start fold 1


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'Dataset': 'sg2', 'Model': 'roberta', 'Distant': True, 'Loss': 0.769734799861908, 'Accuracy': 0.4594594594594595, 'Precision': 0.4444444444444444, 'Recall': 1.0, 'F1': 0.6153846153846153, 'F1 Micro': 0.4594594594594595, 'F1 Weighted': 0.31770931770931765}
  Dataset    Model Distant      Loss  Accuracy  Precision  Recall        F1  \
0     sg1  deberta    True  4.138522  0.500000   0.000000     0.0  0.000000   
1     sg2  roberta    True  0.769735  0.459459   0.444444     1.0  0.615385   

   F1 Micro  F1 Weighted  
0  0.500000     0.333333  
1  0.459459     0.317709  


  results = results.append(row, ignore_index=True)
  results = results.append(row, ignore_index=True)


## Deberta

In [None]:
# Sg1 
results = measure(run_model_5fold('sg1', df_sg1, 'deberta', freeze_encoder=False, pretrained=False, plot=False), results)
print(results)

In [None]:
# Sg2 
results = measure(run_model_5fold('sg2', df_sg2, 'deberta', freeze_encoder=False, pretrained=False, plot=False), results)
print(results)

## Deberta w/ Distant

In [13]:
# Sg1 
results = measure(run_model_5fold('sg1', df_sg1, 'deberta', freeze_encoder=False, pretrained=True, plot=False), results)
print(results)

2022-10-29 09:48:02.459631: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:966] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-10-29 09:48:02.482439: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:966] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-10-29 09:48:02.483003: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:966] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-10-29 09:48:02.484292: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate

### Start fold 1


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
All model checkpoint layers were used when initializing TFDebertaForSequenceClassification.

Some layers of TFDebertaForSequenceClassification were not initialized from the model checkpoint at kamalkraj/deberta-base and are newly initialized: ['cls_dropout', 'pooler', 'classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Instructions for updating:
The TensorFlow Distributions library has moved to TensorFlow Probability (https://github.com/tensorflow/probability). You should update all references to use `tfp.distributions` instead of `tf.distributions`.
Instructions for updating:
The TensorFlow Distributions library has moved to TensorFlow Probability (https://github.com/tensorflow/probability). You should update all references to use `tfp.distributions` instead of `tf.distributions`.
{'Dataset': 'sg1', 'Model': 'deberta', 'Distant': True, 'Loss': 4.138521671295166, 'Accuracy': 0.5, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'F1 Micro': 0.5, 'F1 Weighted': 0.3333333333333333}
  Dataset    Model Distant      Loss  Accuracy  Precision  Recall   F1  \
0     sg1  deberta    True  4.138522       0.5        0.0     0.0  0.0   

   F1 Micro  F1 Weighted  
0       0.5     0.333333  


  _warn_prf(average, modifier, msg_start, len(result))
  results = results.append(row, ignore_index=True)


In [17]:
# Sg2 
results = measure(run_model_5fold('sg2', df_sg2, 'deberta', freeze_encoder=False, pretrained=True, plot=False), results)
print(results)

All model checkpoint layers were used when initializing TFDebertaForSequenceClassification.

Some layers of TFDebertaForSequenceClassification were not initialized from the model checkpoint at kamalkraj/deberta-base and are newly initialized: ['cls_dropout', 'pooler', 'classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


### Start fold 1


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
All model checkpoint layers were used when initializing TFDebertaForSequenceClassification.

Some layers of TFDebertaForSequenceClassification were not initialized from the model checkpoint at kamalkraj/deberta-base and are newly initialized: ['cls_dropout', 'pooler', 'classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'Dataset': 'sg2', 'Model': 'deberta', 'Distant': True, 'Loss': 0.7459329962730408, 'Accuracy': 0.43243243243243246, 'Precision': 0.43243243243243246, 'Recall': 1.0, 'F1': 0.6037735849056604, 'F1 Micro': 0.43243243243243246, 'F1 Weighted': 0.2610912799592045}
  Dataset    Model Distant      Loss  Accuracy  Precision  Recall        F1  \
0     sg1  deberta    True  4.138522  0.500000   0.000000     0.0  0.000000   
1     sg2  roberta    True  0.769735  0.459459   0.444444     1.0  0.615385   
2     sg2  deberta    True  0.745933  0.432432   0.432432     1.0  0.603774   

   F1 Micro  F1 Weighted  
0  0.500000     0.333333  
1  0.459459     0.317709  
2  0.432432     0.261091  


  results = results.append(row, ignore_index=True)
  results = results.append(row, ignore_index=True)


## Distilbert

In [23]:
# Sg1 
results = measure(run_model_5fold('sg1', df_sg1, 'distilbert', freeze_encoder=False, pretrained=False, plot=False), results)
print(results)

### Start fold 1


Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/483 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/363M [00:00<?, ?B/s]

Some layers from the model checkpoint at distilbert-base-uncased were not used when initializing TFDistilBertForSequenceClassification: ['activation_13', 'vocab_layer_norm', 'vocab_transform', 'vocab_projector']
- This IS expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFDistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['dropout_19', 'classifier', 'pre_classifier']
You should probably TRAIN this model on a down-stream task to be able to use i

{'Dataset': 'sg1', 'Model': 'distilbert', 'Distant': False, 'Loss': 0.8513896465301514, 'Accuracy': 0.5, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'F1 Micro': 0.5, 'F1 Weighted': 0.3333333333333333}
  Dataset       Model Distant      Loss  Accuracy  Precision  Recall  \
0     sg1     deberta    True  4.138522  0.500000   0.000000    0.00   
1     sg2     roberta    True  0.769735  0.459459   0.444444    1.00   
2     sg2     deberta    True  0.745933  0.432432   0.432432    1.00   
3     sg1     electra   False  1.042484  0.437500   0.461538    0.75   
4     sg1       xlnet   False  3.889588  0.500000   0.500000    1.00   
5     sg1  distilbert   False  0.851390  0.500000   0.000000    0.00   

         F1  F1 Micro  F1 Weighted  
0  0.000000  0.500000     0.333333  
1  0.615385  0.459459     0.317709  
2  0.603774  0.432432     0.261091  
3  0.571429  0.437500     0.376623  
4  0.666667  0.500000     0.333333  
5  0.000000  0.500000     0.333333  


  _warn_prf(average, modifier, msg_start, len(result))
  results = results.append(row, ignore_index=True)
  results = results.append(row, ignore_index=True)


In [None]:
# Sg2 
results = measure(run_model_5fold('sg2', df_sg2, 'distilbert', freeze_encoder=False, pretrained=False, plot=False), results)
print(results)

## Xlnet

In [22]:
# Sg1 
results = measure(run_model_5fold('sg1', df_sg1, 'xlnet', freeze_encoder=False, pretrained=False, plot=False), results)
print(results)

### Start fold 1


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Some layers from the model checkpoint at xlnet-base-cased were not used when initializing TFXLNetForSequenceClassification: ['lm_loss']
- This IS expected if you are initializing TFXLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFXLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFXLNetForSequenceClassification were not initialized from the model

{'Dataset': 'sg1', 'Model': 'xlnet', 'Distant': False, 'Loss': 3.8895883560180664, 'Accuracy': 0.5, 'Precision': 0.5, 'Recall': 1.0, 'F1': 0.6666666666666666, 'F1 Micro': 0.5, 'F1 Weighted': 0.3333333333333333}
  Dataset    Model Distant      Loss  Accuracy  Precision  Recall        F1  \
0     sg1  deberta    True  4.138522  0.500000   0.000000    0.00  0.000000   
1     sg2  roberta    True  0.769735  0.459459   0.444444    1.00  0.615385   
2     sg2  deberta    True  0.745933  0.432432   0.432432    1.00  0.603774   
3     sg1  electra   False  1.042484  0.437500   0.461538    0.75  0.571429   
4     sg1    xlnet   False  3.889588  0.500000   0.500000    1.00  0.666667   

   F1 Micro  F1 Weighted  
0  0.500000     0.333333  
1  0.459459     0.317709  
2  0.432432     0.261091  
3  0.437500     0.376623  
4  0.500000     0.333333  


  results = results.append(row, ignore_index=True)
  results = results.append(row, ignore_index=True)


In [None]:
# Sg2 
results = measure(run_model_5fold('sg2', df_sg2, 'xlnet', freeze_encoder=False, pretrained=False, plot=False), results)
print(results)

## Electra

In [18]:
# Sg1 
results = measure(run_model_5fold('sg1', df_sg1, 'electra', freeze_encoder=False, pretrained=False, plot=False), results)
print(results)

### Start fold 1


Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/665 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/54.5M [00:00<?, ?B/s]

Some layers from the model checkpoint at google/electra-small-discriminator were not used when initializing TFElectraForSequenceClassification: ['discriminator_predictions']
- This IS expected if you are initializing TFElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'Dataset': 'sg1', 'Model': 'electra', 'Distant': False, 'Loss': 1.0424840450286865, 'Accuracy': 0.4375, 'Precision': 0.46153846153846156, 'Recall': 0.75, 'F1': 0.5714285714285714, 'F1 Micro': 0.4375, 'F1 Weighted': 0.37662337662337664}
  Dataset    Model Distant      Loss  Accuracy  Precision  Recall        F1  \
0     sg1  deberta    True  4.138522  0.500000   0.000000    0.00  0.000000   
1     sg2  roberta    True  0.769735  0.459459   0.444444    1.00  0.615385   
2     sg2  deberta    True  0.745933  0.432432   0.432432    1.00  0.603774   
3     sg1  electra   False  1.042484  0.437500   0.461538    0.75  0.571429   

   F1 Micro  F1 Weighted  
0  0.500000     0.333333  
1  0.459459     0.317709  
2  0.432432     0.261091  
3  0.437500     0.376623  


  results = results.append(row, ignore_index=True)
  results = results.append(row, ignore_index=True)


In [None]:
# Sg2 
results = measure(run_model_5fold('sg2', df_sg2, 'electra', freeze_encoder=False, pretrained=False, plot=False), results)
print(results)