In [1]:
!pip install transformers
!pip install sentencepiece
!pip install openpyxl

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.24.0-py3-none-any.whl (5.5 MB)
[K     |████████████████████████████████| 5.5 MB 4.5 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.11.0-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 70.6 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 70.5 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.11.0 tokenizers-0.13.2 transformers-4.24.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting sentencepiece
  Downloading sentencepiece-0.1.97-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[K  

In [2]:
import sys
import os
import time
import re
import random
from typing import Dict, List, Optional, Union
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import gc

from google.colab import files

from sklearn.model_selection import train_test_split, KFold, StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import tensorflow as tf
from transformers import BertTokenizer, BertConfig, TFBertForSequenceClassification
from transformers import DistilBertTokenizer, TFDistilBertForSequenceClassification
from transformers import RobertaTokenizer, TFRobertaForSequenceClassification
from transformers import ElectraTokenizer, TFElectraForSequenceClassification
from transformers import XLNetTokenizer, TFXLNetForSequenceClassification
from transformers import LongformerTokenizer, TFLongformerForSequenceClassification
from transformers import DebertaTokenizer, TFDebertaForSequenceClassification

In [3]:
# set seed, TF uses python ramdom and numpy library, so these must also be fixed
tf.random.set_seed(0)
random.seed(0)
np.random.seed(0)
os.environ['PYTHONHASHSEED']=str(0)
os.environ['TF_DETERMINISTIC_OPS'] = '0'

In [4]:
PATH_sg1 = "/content/drive/MyDrive/Colab Notebooks/data/final_labels_SG1.xlsx"
PATH_sg2 = "/content/drive/MyDrive/Colab Notebooks/data/final_labels_SG2.xlsx"
df_sg1 = pd.read_excel(PATH_sg1)
df_sg2 = pd.read_excel(PATH_sg2)
df_sg1.rename(columns={'text': 'sentence', 'label_bias': 'Label_bias'}, inplace=True)
df_sg2.rename(columns={'text': 'sentence', 'label_bias': 'Label_bias'}, inplace=True)
df_sg1.head()

Unnamed: 0,sentence,news_link,outlet,topic,type,Label_bias,label_opinion,biased_words
0,The Republican president assumed he was helpin...,http://www.msnbc.com/rachel-maddow-show/auto-i...,msnbc,environment,left,Biased,Expresses writer’s opinion,[]
1,Though the indictment of a woman for her own p...,https://eu.usatoday.com/story/news/nation/2019...,usa-today,abortion,center,Non-biased,Somewhat factual but also opinionated,[]
2,Ingraham began the exchange by noting American...,https://www.breitbart.com/economy/2020/01/12/d...,breitbart,immigration,right,No agreement,No agreement,['flood']
3,The tragedy of America’s 18 years in Afghanist...,http://feedproxy.google.com/~r/breitbart/~3/ER...,breitbart,international-politics-and-world-news,right,Biased,Somewhat factual but also opinionated,"['tragedy', 'stubborn']"
4,The justices threw out a challenge from gun ri...,https://www.huffpost.com/entry/supreme-court-g...,msnbc,gun-control,left,Non-biased,Entirely factual,[]


In [5]:
# binarize classification problem
df_sg1 = df_sg1[df_sg1['Label_bias']!='No agreement']
df_sg1 = df_sg1[df_sg1['Label_bias'].isna()==False]
df_sg1.replace(to_replace='Biased', value=1, inplace=True)
df_sg1.replace(to_replace='Non-biased', value=0, inplace=True)

df_sg2 = df_sg2[df_sg2['Label_bias']!='No agreement']
df_sg2.replace(to_replace='Biased', value=1, inplace=True)
df_sg2.replace(to_replace='Non-biased', value=0, inplace=True)

# # test pipeline set
# df_sg1, exclude = train_test_split(df_sg1, test_size=0.95)
# df_sg2, exclude = train_test_split(df_sg2, test_size=0.8)

In [51]:
# Stratified k-Fold instance
skfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

In [6]:
# helper functions called in skfold loop

def pd_to_tf(df):
    """convert a pandas dataframe into a tensorflow dataset"""
    target = df.pop('Label_bias')
    sentence = df.pop('sentence')
    return tf.data.Dataset.from_tensor_slices((sentence.values, target.values))

def plot_graphs(history, metric):
    plt.plot(history.history[metric])
    plt.plot(history.history['val_'+metric], '')
    plt.xlabel("Epochs")
    plt.ylabel(metric)
    plt.legend([metric, 'val_'+metric])
    plt.show()

def tokenize(df, model_name):
    """convert a pandas dataframe into a tensorflow dataset and run hugging face's tokenizer on data"""
    df2 = df.copy(deep=False)
    target = df2.pop('Label_bias')
    sentence = df2.pop('sentence')
    
    if model_name=='bert':
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    elif model_name=='roberta':
        tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    elif model_name=='deberta':
        tokenizer = DebertaTokenizer.from_pretrained("kamalkraj/deberta-base")
    elif model_name=='electra':
        tokenizer = ElectraTokenizer.from_pretrained('google/electra-small-discriminator')

    train_encodings = tokenizer(
                        sentence.tolist(),                      
                        add_special_tokens = True, # add [CLS], [SEP]
                        truncation = True, # cut off at max length of the text that can go to BERT
                        padding = True, # add [PAD] tokens
                        return_attention_mask = True, # add attention mask to not focus on pad tokens
              )
    
    dataset = tf.data.Dataset.from_tensor_slices(
        (dict(train_encodings), 
         target.tolist()))
    
    # clear unused memory
    del(df2)
    del(target)
    del(sentence)
    del(tokenizer)
    del(train_encodings)
    gc.collect()
    
    return dataset

In [48]:
def run_model_5fold(df_name, df_train, model_name, freeze_encoder=False, pretrained=False, plot=False, batch_size=32, epochs=10):
    """"function to run 5-fold cross validation for any provided model"""

    # these variables will be needed for skfold to select indices
    Y = df_train['Label_bias']
    X = df_train['sentence']

    # hyperparams
    BUFFER_SIZE = 10000
    BATCH_SIZE = batch_size
    k = 1

    val_loss = []
    val_acc = []
    val_prec = []
    val_rec = []
    val_f1 = []
    val_f1_micro = []
    val_f1_wmacro = []
    
    if pretrained==True:
        optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)
        
        if model_name=='bert':
            transfer_model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased')
            
        elif model_name=='roberta':
            transfer_model = TFRobertaForSequenceClassification.from_pretrained('roberta-base')
            
        elif model_name=='deberta':
            transfer_model = TFDebertaForSequenceClassification.from_pretrained("kamalkraj/deberta-base")
            
        transfer_model.compile(optimizer=optimizer, loss='binary_crossentropy') 
        transfer_model.load_weights(f'/content/drive/MyDrive/Colab Notebooks/weights/{model_name}_final_checkpoint_news_headlines_USA')
        trained_model_layer = transfer_model.get_layer(index=0).get_weights()
            

    for train_index, val_index in skfold.split(X,Y):
        print('### Start fold {}'.format(k))

        # split into train and validation set
        train_dataset = df_train.iloc[train_index]
        val_dataset = df_train.iloc[val_index]

        # prepare data for transformer
        train_dataset = tokenize(train_dataset, model_name)
        val_dataset = tokenize(val_dataset, model_name)

        # mini-batch it
        train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE)
        val_dataset = val_dataset.batch(BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE)

        # create new model
        if model_name == 'bert':
            model = TFBertForSequenceClassification.from_pretrained("bert-base-uncased")
        elif model_name == 'roberta':
            model = TFRobertaForSequenceClassification.from_pretrained('roberta-base')
        elif model_name == 'electra':
            model = TFElectraForSequenceClassification.from_pretrained('google/electra-small-discriminator')
        elif model_name == 'deberta':
            model = TFDebertaForSequenceClassification.from_pretrained("kamalkraj/deberta-base")

        # freeze flags whether encoder layer should be frozen to not destroy transfer learning. Only set to false when enough data is provided
        if freeze_encoder == True:
            for w in model.get_layer(index=0).weights:
                w._trainable = False

        # compile it
        optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5) 
        model.compile(optimizer=optimizer, loss='binary_crossentropy') 

        # transfer learning
        if pretrained == True:
            model.get_layer(index=0).set_weights(trained_model_layer) # load bias-specific weights

        # after 2 epochs without improvement, stop training
        callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)

        # fit it
        history = model.fit(train_dataset, epochs=epochs, validation_data = val_dataset, callbacks=[callback])

        # plot history
        if plot:
            plot_graphs(history,'loss')

        # evaluate
        loss = model.evaluate(val_dataset)

        logits = model.predict(val_dataset)  
        yhats = []
        for i in logits[0]:
            # assign class label according to highest logit
            candidates = i.tolist()
            decision = candidates.index(max(candidates))
            yhats.append(decision)

        y = []
        for text, label in val_dataset.unbatch():   
            y.append(label.numpy())
        

        val_loss.append(loss)
        val_acc.append(accuracy_score(y, yhats))
        val_prec.append(precision_score(y, yhats))
        val_rec.append(recall_score(y, yhats))
        val_f1.append(f1_score(y, yhats))
        val_f1_micro.append(f1_score(y, yhats, average='micro'))
        val_f1_wmacro.append(f1_score(y, yhats, average='weighted'))
        
        

        tf.keras.backend.clear_session()
        
        # clear unused memory
        del(train_dataset)
        del(val_dataset)
        del(history)
        del(optimizer)
        del(callback)
        del(loss)
        del(y)
        gc.collect()

        k += 1
    
    # save model weights
    trained_layer = model.get_layer(index=0).get_weights()
    model.save_weights(f'/content/drive/MyDrive/Colab Notebooks/weights/{model_name}_{pretrained}_{df_name}_main')

    return {'loss': val_loss, 'acc': val_acc, 'prec': val_prec, 'rec': val_rec, 'f1': val_f1, 
            'f1_micro': val_f1_micro, 'f1_wmacro': val_f1_wmacro, 'model_name': model_name, 
            'distant': pretrained, 'df_name': df_name} 

In [46]:
def measure(d, results):
    loss_cv = np.mean(d['loss'])
    acc_cv = np.mean(d['acc'])
    prec_cv = np.mean(d['prec'])
    rec_cv = np.mean(d['rec'])
    f1_cv = np.mean(d['f1'])
    f1_micro_cv = np.mean(d['f1_micro'])
    f1_wmacro_cv = np.mean(d['f1_wmacro'])
    
    row = {
        'Dataset': d['df_name'],
        'Model': d['model_name'], 
        'Distant': d['distant'], 
        'Loss': loss_cv, 
        'Accuracy': acc_cv, 
        'Precision': prec_cv, 
        'Recall': rec_cv, 
        'F1': f1_cv, 
        'F1 Micro': f1_micro_cv, 
        'F1 Weighted': f1_wmacro_cv
    }
    
    results = results.append(row, ignore_index=True)
    
    return results

In [47]:
## instantiate results df
columns = ['Dataset', 'Model', 'Distant', 'Loss', 'Accuracy', 'Precision', 'Recall', 'F1', 'F1 Micro', 'F1 Weighted']
results = pd.DataFrame(columns=columns)

# BERT

In [None]:
# Sg1 
results = measure(run_model_5fold('sg1', df_sg1, 'bert', freeze_encoder=False, pretrained=False, plot=False), results)
print(results)

### Start fold 1


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
### Start fold 2


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
### Start fold 3


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
### Start fold 4


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
### Start fold 5


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
  Dataset Model Distant     Loss  Accuracy  Precision    Recall        F1  \
0     sg1  bert   False  0.55603  0.451406   0.400755  0.487284  0.420398   

   F1 Micro  F1 Weighted  
0  0.451406     0.413136  


In [None]:
# Sg2 
results = measure(run_model_5fold('sg2', df_sg2, 'bert', freeze_encoder=False, pretrained=False, plot=False), results)
print(results)

### Start fold 1


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
### Start fold 2


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


  _warn_prf(average, modifier, msg_start, len(result))


### Start fold 3


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
### Start fold 4


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
### Start fold 5


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
  Dataset Model Distant      Loss  Accuracy  Precision    Recall        F1  \
0     sg1  bert   False  0.556030  0.451406   0.400755  0.487284  0.420398   
1     sg2  bert   False  1.265255  0.520005   0.428627  0.239779  0.198883   

   F1 Micro  F1 Weighted  
0  0.451406     0.413136  
1  0.520005     0.378480  


# RoBERTa

In [None]:
# Sg1 
results = measure(run_model_5fold('sg1', df_sg1, 'roberta', freeze_encoder=False, pretrained=False, plot=False), results)
print(results)

### Start fold 1


Downloading:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/481 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/657M [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
### Start fold 2


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
### Start fold 3


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
### Start fold 4


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
### Start fold 5


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
  Dataset    Model Distant      Loss  Accuracy  Precision    Recall        F1  \
0     sg1  roberta   False  0.580312  0.515538    0.56224  0.636251  0.460742   

   F1 Micro  F1 Weighted  
0  0.515538     0.377869  


In [None]:
# Sg2 
results = measure(run_model_5fold('sg2', df_sg2, 'roberta', freeze_encoder=False, pretrained=False, plot=False), results)
print(results)

### Start fold 1


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
### Start fold 2


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10


  _warn_prf(average, modifier, msg_start, len(result))


### Start fold 3


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
### Start fold 4


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
### Start fold 5


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
  Dataset    Model Distant      Loss  Accuracy  Precision    Recall        F1  \
0     sg1  roberta   False  0.580312  0.515538   0.562240  0.636251  0.460742   
1     sg2  roberta   False  0.512574  0.545579   0.522718  0.595028  0.418805   

   F1 Micro  F1 Weighted  
0  0.515538     0.377869  
1  0.545579     0.419941  


# DeBERTa

In [None]:
# Sg1 
results = measure(run_model_5fold('sg1', df_sg1, 'deberta', freeze_encoder=False, pretrained=False, plot=False), results)
print(results)

### Start fold 1


Downloading:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/744 [00:00<?, ?B/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Downloading:   0%|          | 0.00/555M [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFDebertaForSequenceClassification.

Some layers of TFDebertaForSequenceClassification were not initialized from the model checkpoint at kamalkraj/deberta-base and are newly initialized: ['classifier', 'cls_dropout', 'pooler']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


Instructions for updating:
The TensorFlow Distributions library has moved to TensorFlow Probability (https://github.com/tensorflow/probability). You should update all references to use `tfp.distributions` instead of `tf.distributions`.
Instructions for updating:
The TensorFlow Distributions library has moved to TensorFlow Probability (https://github.com/tensorflow/probability). You should update all references to use `tfp.distributions` instead of `tf.distributions`.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
### Start fold 2


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
All model checkpoint layers were used when initializing TFDebertaForSequenceClassification.

Some layers of TFDebertaForSequenceClassification were not initialized from the model checkpoint at kamalkraj/deberta-base and are newly initialized: ['classifier', 'cls_dropout', 'pooler']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
### Start fold 3


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
All model checkpoint layers were used when initializing TFDebertaForSequenceClassification.

Some layers of TFDebertaForSequenceClassification were not initialized from the model checkpoint at kamalkraj/deberta-base and are newly initialized: ['classifier', 'cls_dropout', 'pooler']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
### Start fold 4


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
All model checkpoint layers were used when initializing TFDebertaForSequenceClassification.

Some layers of TFDebertaForSequenceClassification were not initialized from the model checkpoint at kamalkraj/deberta-base and are newly initialized: ['classifier', 'cls_dropout', 'pooler']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
### Start fold 5


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
All model checkpoint layers were used when initializing TFDebertaForSequenceClassification.

Some layers of TFDebertaForSequenceClassification were not initialized from the model checkpoint at kamalkraj/deberta-base and are newly initialized: ['classifier', 'cls_dropout', 'pooler']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


  _warn_prf(average, modifier, msg_start, len(result))


  Dataset    Model Distant      Loss  Accuracy  Precision    Recall        F1  \
0     sg1  roberta   False  0.580312  0.515538   0.562240  0.636251  0.460742   
1     sg2  roberta   False  0.512574  0.545579   0.522718  0.595028  0.418805   
2     sg1  deberta   False  1.786612  0.513528   0.431187  0.633163  0.467586   

   F1 Micro  F1 Weighted  
0  0.515538     0.377869  
1  0.545579     0.419941  
2  0.513528     0.389934  


In [None]:
# Sg2
results = measure(run_model_5fold('sg2', df_sg2, 'deberta', freeze_encoder=False, pretrained=False, plot=False), results)
print(results)

### Start fold 1


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
All model checkpoint layers were used when initializing TFDebertaForSequenceClassification.

Some layers of TFDebertaForSequenceClassification were not initialized from the model checkpoint at kamalkraj/deberta-base and are newly initialized: ['classifier', 'cls_dropout', 'pooler']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
### Start fold 2


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
All model checkpoint layers were used when initializing TFDebertaForSequenceClassification.

Some layers of TFDebertaForSequenceClassification were not initialized from the model checkpoint at kamalkraj/deberta-base and are newly initialized: ['classifier', 'cls_dropout', 'pooler']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10


  _warn_prf(average, modifier, msg_start, len(result))


### Start fold 3


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
All model checkpoint layers were used when initializing TFDebertaForSequenceClassification.

Some layers of TFDebertaForSequenceClassification were not initialized from the model checkpoint at kamalkraj/deberta-base and are newly initialized: ['classifier', 'cls_dropout', 'pooler']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
### Start fold 4


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
All model checkpoint layers were used when initializing TFDebertaForSequenceClassification.

Some layers of TFDebertaForSequenceClassification were not initialized from the model checkpoint at kamalkraj/deberta-base and are newly initialized: ['classifier', 'cls_dropout', 'pooler']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
### Start fold 5


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
All model checkpoint layers were used when initializing TFDebertaForSequenceClassification.

Some layers of TFDebertaForSequenceClassification were not initialized from the model checkpoint at kamalkraj/deberta-base and are newly initialized: ['classifier', 'cls_dropout', 'pooler']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


  _warn_prf(average, modifier, msg_start, len(result))


  Dataset    Model Distant      Loss  Accuracy  Precision    Recall        F1  \
0     sg1  roberta   False  0.580312  0.515538   0.562240  0.636251  0.460742   
1     sg2  roberta   False  0.512574  0.545579   0.522718  0.595028  0.418805   
2     sg1  deberta   False  1.786612  0.513528   0.431187  0.633163  0.467586   
3     sg2  deberta   False  2.062078  0.500138   0.296457  0.600000  0.396837   

   F1 Micro  F1 Weighted  
0  0.515538     0.377869  
1  0.545579     0.419941  
2  0.513528     0.389934  
3  0.500138     0.335278  


# BERT w/ Distant

In [None]:
# Sg1 
results = measure(run_model_5fold('sg1', df_sg1, 'bert', freeze_encoder=False, pretrained=True, plot=False), results)
print(results)

All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


### Start fold 1


Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


  _warn_prf(average, modifier, msg_start, len(result))


### Start fold 2


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


  _warn_prf(average, modifier, msg_start, len(result))


### Start fold 3


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
### Start fold 4


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
### Start fold 5


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10




  Dataset    Model Distant      Loss  Accuracy  Precision    Recall        F1  \
0     sg1  roberta   False  0.580312  0.515538   0.562240  0.636251  0.460742   
1     sg2  roberta   False  0.512574  0.545579   0.522718  0.595028  0.418805   
2     sg1  deberta   False  1.786612  0.513528   0.431187  0.633163  0.467586   
3     sg2  deberta   False  2.062078  0.500138   0.296457  0.600000  0.396837   
4     sg1  electra   False  0.550895  0.475357   0.444049  0.380805  0.393093   
5     sg2  electra   False  0.482666  0.506686   0.651247  0.436464  0.394047   
6     sg1  deberta    True  0.617424  0.492855   0.334588  0.584009  0.418621   
7     sg2  deberta    True  0.620772  0.412249   0.402475  0.313812  0.241562   
8     sg1     bert    True  1.253572  0.452740   0.246955  0.457718  0.316711   

   F1 Micro  F1 Weighted  
0  0.515538     0.377869  
1  0.545579     0.419941  
2  0.513528     0.389934  
3  0.500138     0.335278  
4  0.475357     0.448213  
5  0.506686     0.440985  


In [None]:
# Sg2 
results = measure(run_model_5fold('sg2', df_sg2, 'bert', freeze_encoder=False, pretrained=True, plot=False), results)
print(results)

All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


### Start fold 1


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
### Start fold 2


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
### Start fold 3


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
### Start fold 4


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
### Start fold 5


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10




  Dataset    Model Distant      Loss  Accuracy  Precision    Recall        F1  \
0     sg1  roberta   False  0.580312  0.515538   0.562240  0.636251  0.460742   
1     sg2  roberta   False  0.512574  0.545579   0.522718  0.595028  0.418805   
2     sg1  deberta   False  1.786612  0.513528   0.431187  0.633163  0.467586   
3     sg2  deberta   False  2.062078  0.500138   0.296457  0.600000  0.396837   
4     sg1  electra   False  0.550895  0.475357   0.444049  0.380805  0.393093   
5     sg2  electra   False  0.482666  0.506686   0.651247  0.436464  0.394047   
6     sg1  deberta    True  0.617424  0.492855   0.334588  0.584009  0.418621   
7     sg2  deberta    True  0.620772  0.412249   0.402475  0.313812  0.241562   
8     sg1     bert    True  1.253572  0.452740   0.246955  0.457718  0.316711   
9     sg2     bert    True  0.532223  0.558989   0.539072  0.743646  0.606827   

   F1 Micro  F1 Weighted  
0  0.515538     0.377869  
1  0.545579     0.419941  
2  0.513528     0.389934  


# RoBERTa w/ Distant

In [None]:
# Sg1 
results = measure(run_model_5fold('sg1', df_sg1, 'roberta', freeze_encoder=False, pretrained=True, plot=False), results)
print(results)

All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


### Start fold 1


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
### Start fold 2


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
### Start fold 3


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
### Start fold 4


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
### Start fold 5


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10




   Dataset    Model Distant      Loss  Accuracy  Precision    Recall  \
0      sg1  roberta   False  0.580312  0.515538   0.562240  0.636251   
1      sg2  roberta   False  0.512574  0.545579   0.522718  0.595028   
2      sg1  deberta   False  1.786612  0.513528   0.431187  0.633163   
3      sg2  deberta   False  2.062078  0.500138   0.296457  0.600000   
4      sg1  electra   False  0.550895  0.475357   0.444049  0.380805   
5      sg2  electra   False  0.482666  0.506686   0.651247  0.436464   
6      sg1  deberta    True  0.617424  0.492855   0.334588  0.584009   
7      sg2  deberta    True  0.620772  0.412249   0.402475  0.313812   
8      sg1     bert    True  1.253572  0.452740   0.246955  0.457718   
9      sg2     bert    True  0.532223  0.558989   0.539072  0.743646   
10     sg1  roberta    True  0.546565  0.468920   0.549649  0.386568   

          F1  F1 Micro  F1 Weighted  
0   0.460742  0.515538     0.377869  
1   0.418805  0.545579     0.419941  
2   0.467586  0.51352

In [None]:
# Sg2
results = measure(run_model_5fold('sg2', df_sg2, 'roberta', freeze_encoder=False, pretrained=True, plot=False), results)
print(results)

All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


### Start fold 1


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
### Start fold 2


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
### Start fold 3


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
### Start fold 4


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
### Start fold 5


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10




   Dataset    Model Distant      Loss  Accuracy  Precision    Recall  \
0      sg1  roberta   False  0.580312  0.515538   0.562240  0.636251   
1      sg2  roberta   False  0.512574  0.545579   0.522718  0.595028   
2      sg1  deberta   False  1.786612  0.513528   0.431187  0.633163   
3      sg2  deberta   False  2.062078  0.500138   0.296457  0.600000   
4      sg1  electra   False  0.550895  0.475357   0.444049  0.380805   
5      sg2  electra   False  0.482666  0.506686   0.651247  0.436464   
6      sg1  deberta    True  0.617424  0.492855   0.334588  0.584009   
7      sg2  deberta    True  0.620772  0.412249   0.402475  0.313812   
8      sg1     bert    True  1.253572  0.452740   0.246955  0.457718   
9      sg2     bert    True  0.532223  0.558989   0.539072  0.743646   
10     sg1  roberta    True  0.546565  0.468920   0.549649  0.386568   
11     sg2  roberta    True  0.498680  0.570941   0.598381  0.611602   

          F1  F1 Micro  F1 Weighted  
0   0.460742  0.515538   

# DeBERTa w/ Distant

In [None]:
# Sg1 
results = measure(run_model_5fold('sg1', df_sg1, 'deberta', freeze_encoder=False, pretrained=True, plot=False), results)
print(results)

All model checkpoint layers were used when initializing TFDebertaForSequenceClassification.

Some layers of TFDebertaForSequenceClassification were not initialized from the model checkpoint at kamalkraj/deberta-base and are newly initialized: ['classifier', 'cls_dropout', 'pooler']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


### Start fold 1


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
All model checkpoint layers were used when initializing TFDebertaForSequenceClassification.

Some layers of TFDebertaForSequenceClassification were not initialized from the model checkpoint at kamalkraj/deberta-base and are newly initialized: ['classifier', 'cls_dropout', 'pooler']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
### Start fold 2


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
All model checkpoint layers were used when initializing TFDebertaForSequenceClassification.

Some layers of TFDebertaForSequenceClassification were not initialized from the model checkpoint at kamalkraj/deberta-base and are newly initialized: ['classifier', 'cls_dropout', 'pooler']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


  _warn_prf(average, modifier, msg_start, len(result))


### Start fold 3


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
All model checkpoint layers were used when initializing TFDebertaForSequenceClassification.

Some layers of TFDebertaForSequenceClassification were not initialized from the model checkpoint at kamalkraj/deberta-base and are newly initialized: ['classifier', 'cls_dropout', 'pooler']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
### Start fold 4


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
All model checkpoint layers were used when initializing TFDebertaForSequenceClassification.

Some layers of TFDebertaForSequenceClassification were not initialized from the model checkpoint at kamalkraj/deberta-base and are newly initialized: ['classifier', 'cls_dropout', 'pooler']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
### Start fold 5


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
All model checkpoint layers were used when initializing TFDebertaForSequenceClassification.

Some layers of TFDebertaForSequenceClassification were not initialized from the model checkpoint at kamalkraj/deberta-base and are newly initialized: ['classifier', 'cls_dropout', 'pooler']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10




  Dataset    Model Distant      Loss  Accuracy  Precision    Recall        F1  \
0     sg1  roberta   False  0.580312  0.515538   0.562240  0.636251  0.460742   
1     sg2  roberta   False  0.512574  0.545579   0.522718  0.595028  0.418805   
2     sg1  deberta   False  1.786612  0.513528   0.431187  0.633163  0.467586   
3     sg2  deberta   False  2.062078  0.500138   0.296457  0.600000  0.396837   
4     sg1  electra   False  0.550895  0.475357   0.444049  0.380805  0.393093   
5     sg2  electra   False  0.482666  0.506686   0.651247  0.436464  0.394047   
6     sg1  deberta    True  0.617424  0.492855   0.334588  0.584009  0.418621   

   F1 Micro  F1 Weighted  
0  0.515538     0.377869  
1  0.545579     0.419941  
2  0.513528     0.389934  
3  0.500138     0.335278  
4  0.475357     0.448213  
5  0.506686     0.440985  
6  0.492855     0.396586  


In [None]:
# Sg2
results = measure(run_model_5fold('sg2', df_sg2, 'deberta', freeze_encoder=False, pretrained=True, plot=False), results)
print(results)

All model checkpoint layers were used when initializing TFDebertaForSequenceClassification.

Some layers of TFDebertaForSequenceClassification were not initialized from the model checkpoint at kamalkraj/deberta-base and are newly initialized: ['classifier', 'cls_dropout', 'pooler']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


### Start fold 1


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
All model checkpoint layers were used when initializing TFDebertaForSequenceClassification.

Some layers of TFDebertaForSequenceClassification were not initialized from the model checkpoint at kamalkraj/deberta-base and are newly initialized: ['classifier', 'cls_dropout', 'pooler']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
### Start fold 2


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
All model checkpoint layers were used when initializing TFDebertaForSequenceClassification.

Some layers of TFDebertaForSequenceClassification were not initialized from the model checkpoint at kamalkraj/deberta-base and are newly initialized: ['classifier', 'cls_dropout', 'pooler']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
### Start fold 3


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
All model checkpoint layers were used when initializing TFDebertaForSequenceClassification.

Some layers of TFDebertaForSequenceClassification were not initialized from the model checkpoint at kamalkraj/deberta-base and are newly initialized: ['classifier', 'cls_dropout', 'pooler']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
### Start fold 4


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
All model checkpoint layers were used when initializing TFDebertaForSequenceClassification.

Some layers of TFDebertaForSequenceClassification were not initialized from the model checkpoint at kamalkraj/deberta-base and are newly initialized: ['classifier', 'cls_dropout', 'pooler']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
### Start fold 5


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
All model checkpoint layers were used when initializing TFDebertaForSequenceClassification.

Some layers of TFDebertaForSequenceClassification were not initialized from the model checkpoint at kamalkraj/deberta-base and are newly initialized: ['classifier', 'cls_dropout', 'pooler']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10


  _warn_prf(average, modifier, msg_start, len(result))


  Dataset    Model Distant      Loss  Accuracy  Precision    Recall        F1  \
0     sg1  roberta   False  0.580312  0.515538   0.562240  0.636251  0.460742   
1     sg2  roberta   False  0.512574  0.545579   0.522718  0.595028  0.418805   
2     sg1  deberta   False  1.786612  0.513528   0.431187  0.633163  0.467586   
3     sg2  deberta   False  2.062078  0.500138   0.296457  0.600000  0.396837   
4     sg1  electra   False  0.550895  0.475357   0.444049  0.380805  0.393093   
5     sg2  electra   False  0.482666  0.506686   0.651247  0.436464  0.394047   
6     sg1  deberta    True  0.617424  0.492855   0.334588  0.584009  0.418621   
7     sg2  deberta    True  0.620772  0.412249   0.402475  0.313812  0.241562   

   F1 Micro  F1 Weighted  
0  0.515538     0.377869  
1  0.545579     0.419941  
2  0.513528     0.389934  
3  0.500138     0.335278  
4  0.475357     0.448213  
5  0.506686     0.440985  
6  0.492855     0.396586  
7  0.412249     0.310578  


# ELECTRA

In [None]:
# Sg1 
results = measure(run_model_5fold('sg1', df_sg1, 'electra', freeze_encoder=False, pretrained=False, plot=False), results)
print(results)

### Start fold 1


Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/665 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/54.5M [00:00<?, ?B/s]

Some layers from the model checkpoint at google/electra-small-discriminator were not used when initializing TFElectraForSequenceClassification: ['discriminator_predictions']
- This IS expected if you are initializing TFElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
### Start fold 2


Some layers from the model checkpoint at google/electra-small-discriminator were not used when initializing TFElectraForSequenceClassification: ['discriminator_predictions']
- This IS expected if you are initializing TFElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
### Start fold 3


Some layers from the model checkpoint at google/electra-small-discriminator were not used when initializing TFElectraForSequenceClassification: ['discriminator_predictions']
- This IS expected if you are initializing TFElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
### Start fold 4


Some layers from the model checkpoint at google/electra-small-discriminator were not used when initializing TFElectraForSequenceClassification: ['discriminator_predictions']
- This IS expected if you are initializing TFElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
### Start fold 5


Some layers from the model checkpoint at google/electra-small-discriminator were not used when initializing TFElectraForSequenceClassification: ['discriminator_predictions']
- This IS expected if you are initializing TFElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
  Dataset    Model Distant      Loss  Accuracy  Precision    Recall        F1  \
0     sg1  roberta   False  0.580312  0.515538   0.562240  0.636251  0.460742   
1     sg2  roberta   False  0.512574  0.545579   0.522718  0.595028  0.418805   
2     sg1  deberta   False  1.786612  0.513528   0.431187  0.633163  0.467586   
3     sg2  deberta   False  2.062078  0.500138   0.296457  0.600000  0.396837   
4     sg1  electra   False  0.550895  0.475357   0.444049  0.380805  0.393093   

   F1 Micro  F1 Weighted  
0  0.515538     0.377869  
1  0.545579     0.419941  
2  0.513528     0.389934  
3  0.500138     0.335278  
4  0.475357     0.448213  


In [None]:
# Sg2 
results = measure(run_model_5fold('sg2', df_sg2, 'electra', freeze_encoder=False, pretrained=False, plot=False), results)
print(results)

### Start fold 1


Some layers from the model checkpoint at google/electra-small-discriminator were not used when initializing TFElectraForSequenceClassification: ['discriminator_predictions']
- This IS expected if you are initializing TFElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
### Start fold 2


Some layers from the model checkpoint at google/electra-small-discriminator were not used when initializing TFElectraForSequenceClassification: ['discriminator_predictions']
- This IS expected if you are initializing TFElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
### Start fold 3


Some layers from the model checkpoint at google/electra-small-discriminator were not used when initializing TFElectraForSequenceClassification: ['discriminator_predictions']
- This IS expected if you are initializing TFElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
### Start fold 4


Some layers from the model checkpoint at google/electra-small-discriminator were not used when initializing TFElectraForSequenceClassification: ['discriminator_predictions']
- This IS expected if you are initializing TFElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
### Start fold 5


Some layers from the model checkpoint at google/electra-small-discriminator were not used when initializing TFElectraForSequenceClassification: ['discriminator_predictions']
- This IS expected if you are initializing TFElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
  Dataset    Model Distant      Loss  Accuracy  Precision    Recall        F1  \
0     sg1  roberta   False  0.580312  0.515538   0.562240  0.636251  0.460742   
1     sg2  roberta   False  0.512574  0.545579   0.522718  0.595028  0.418805   
2     sg1  deberta   False  1.786612  0.513528   0.431187  0.633163  0.467586   
3     sg2  deberta   False  2.062078  0.500138   0.296457  0.600000  0.396837   
4     sg1  electra   False  0.550895  0.475357   0.444049  0.380805  0.393093   
5     sg2  electra   False  0.482666  0.506686   0.651247  0.436464  0.394047   

   F1 Micro  F1 Weighted  
0  0.515538     0.377869  
1  0.545579     0.419941  
2  0.513528     0.389934  
3  0.500138     0.335278  
4  0.475357     0.448213  
5  0.506686     0.440985  


# Analysis

In [34]:
df_sg1.head()

Unnamed: 0,sentence,news_link,outlet,topic,type,Label_bias,label_opinion,biased_words
0,The Republican president assumed he was helpin...,http://www.msnbc.com/rachel-maddow-show/auto-i...,msnbc,environment,left,1,Expresses writer’s opinion,[]
1,Though the indictment of a woman for her own p...,https://eu.usatoday.com/story/news/nation/2019...,usa-today,abortion,center,0,Somewhat factual but also opinionated,[]
3,The tragedy of America’s 18 years in Afghanist...,http://feedproxy.google.com/~r/breitbart/~3/ER...,breitbart,international-politics-and-world-news,right,1,Somewhat factual but also opinionated,"['tragedy', 'stubborn']"
4,The justices threw out a challenge from gun ri...,https://www.huffpost.com/entry/supreme-court-g...,msnbc,gun-control,left,0,Entirely factual,[]
5,A review of his posts in online message boards...,https://eu.usatoday.com/story/news/nation/2020...,usa-today,white-nationalism,center,1,Entirely factual,['plant']


In [10]:
df_sg2.head()

Unnamed: 0,sentence,news_link,outlet,topic,type,Label_bias,label_opinion,biased_words
0,"""Orange Is the New Black"" star Yael Stone is r...",https://www.foxnews.com/entertainment/australi...,Fox News,environment,right,0,Entirely factual,[]
1,"""We have one beautiful law,"" Trump recently sa...",https://www.alternet.org/2020/06/law-and-order...,Alternet,gun control,left,1,Somewhat factual but also opinionated,"['bizarre', 'characteristically']"
2,"...immigrants as criminals and eugenics, all o...",https://www.nbcnews.com/news/latino/after-step...,MSNBC,white-nationalism,left,1,Expresses writer’s opinion,"['criminals', 'fringe', 'extreme']"
3,...we sounded the alarm in the early months of...,https://www.alternet.org/2019/07/fox-news-has-...,Alternet,white-nationalism,left,1,Somewhat factual but also opinionated,[]
4,[Black Lives Matter] is essentially a non-fals...,http://feedproxy.google.com/~r/breitbart/~3/-v...,Breitbart,marriage-equality,,1,Expresses writer’s opinion,['cult']


In [52]:
# sg1 sentences
fact_bias = df_sg1.loc[(df_sg1['label_opinion']=='Entirely factual') & (df_sg1['Label_bias']==1)].sample().iloc[0]
fact_nonbias = df_sg1.loc[(df_sg1['label_opinion']=='Entirely factual') & (df_sg1['Label_bias']==0)].sample().iloc[0]
opin_bias = df_sg1.loc[(df_sg1['label_opinion']=='Expresses writer’s opinion') & (df_sg1['Label_bias']==1)].sample().iloc[0]
opin_nonbias = df_sg1.loc[(df_sg1['label_opinion']=='Expresses writer’s opinion') & (df_sg1['Label_bias']==0)].sample().iloc[0]
both_bias = df_sg1.loc[(df_sg1['label_opinion']=='Somewhat factual but also opinionated') & (df_sg1['Label_bias']==1)].sample().iloc[0]
both_nonbias = df_sg1.loc[(df_sg1['label_opinion']=='Somewhat factual but also opinionated') & (df_sg1['Label_bias']==0)].sample().iloc[0]

sentences = [fact_bias, fact_nonbias, opin_bias, opin_nonbias, both_bias, both_nonbias]

In [None]:
# BERT SG1
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased')
optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5) 
model.compile(optimizer=optimizer, loss='binary_crossentropy') 
model.load_weights(f'/content/drive/MyDrive/Colab Notebooks/weights/bert_False_sg1_main')
trained_model_layer = model.get_layer(index=0).get_weights()
model.get_layer(index=0).set_weights(trained_model_layer)

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [54]:
cols = ['Opinon Label', 'Sentence', 'Predicted', 'Actual']
output = pd.DataFrame(columns=cols)

for s in sentences:
  inputs = tokenizer(s['sentence'], return_tensors="tf")
  logits = model(**inputs).logits
  predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
  row = {'Opinon Label': s['label_opinion'], 
         'Sentence': s['sentence'], 
         'Predicted': predicted_class_id, 
         'Actual': s['Label_bias']
    }
    
  output = output.append(row, ignore_index=True)

print(output)

                            Opinon Label  \
0                       Entirely factual   
1                       Entirely factual   
2             Expresses writer’s opinion   
3             Expresses writer’s opinion   
4  Somewhat factual but also opinionated   
5  Somewhat factual but also opinionated   

                                            Sentence Predicted Actual  
0  While sexual ambiguity does occasionally occur...         0      1  
1  George Washington University (GW)’s Parliament...         0      0  
2  Perhaps that is why both Warren and Sanders ha...         1      1  
3  Despite a pandemic, one-sided presidential nom...         1      0  
4  Before President Donald Trump threatened to vi...         0      1  
5  At the dawn of the year 2010, few Americans co...         0      0  


In [None]:
# BERT Distant SG1
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased')
optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5) 
model.compile(optimizer=optimizer, loss='binary_crossentropy') 
model.load_weights(f'/content/drive/MyDrive/Colab Notebooks/weights/bert_True_sg1_main')
trained_model_layer = model.get_layer(index=0).get_weights()
model.get_layer(index=0).set_weights(trained_model_layer)

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

gc.collect()

In [56]:
cols = ['Opinon Label', 'Sentence', 'Predicted', 'Actual']
output = pd.DataFrame(columns=cols)

for s in sentences:
  inputs = tokenizer(s['sentence'], return_tensors="tf")
  logits = model(**inputs).logits
  predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
  row = {'Opinon Label': s['label_opinion'], 
         'Sentence': s['sentence'], 
         'Predicted': predicted_class_id, 
         'Actual': s['Label_bias']
    }
    
  output = output.append(row, ignore_index=True)

print(output)

                            Opinon Label  \
0                       Entirely factual   
1                       Entirely factual   
2             Expresses writer’s opinion   
3             Expresses writer’s opinion   
4  Somewhat factual but also opinionated   
5  Somewhat factual but also opinionated   

                                            Sentence Predicted Actual  
0  While sexual ambiguity does occasionally occur...         1      1  
1  George Washington University (GW)’s Parliament...         0      0  
2  Perhaps that is why both Warren and Sanders ha...         1      1  
3  Despite a pandemic, one-sided presidential nom...         0      0  
4  Before President Donald Trump threatened to vi...         1      1  
5  At the dawn of the year 2010, few Americans co...         0      0  


In [None]:
# RoBERTa Distant SG1
model = TFBertForSequenceClassification.from_pretrained('roberta-base')
optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5) 
model.compile(optimizer=optimizer, loss='binary_crossentropy') 
model.load_weights(f'/content/drive/MyDrive/Colab Notebooks/weights/roberta_True_sg1_main')
trained_model_layer = model.get_layer(index=0).get_weights()
model.get_layer(index=0).set_weights(trained_model_layer)

tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

In [58]:
cols = ['Opinon Label', 'Sentence', 'Predicted', 'Actual']
output = pd.DataFrame(columns=cols)

for s in sentences:
  inputs = tokenizer(s['sentence'], return_tensors="tf")
  logits = model(**inputs).logits
  predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
  row = {'Opinon Label': s['label_opinion'], 
         'Sentence': s['sentence'], 
         'Predicted': predicted_class_id, 
         'Actual': s['Label_bias']
    }
    
  output = output.append(row, ignore_index=True)

print(output)

                            Opinon Label  \
0                       Entirely factual   
1                       Entirely factual   
2             Expresses writer’s opinion   
3             Expresses writer’s opinion   
4  Somewhat factual but also opinionated   
5  Somewhat factual but also opinionated   

                                            Sentence Predicted Actual  
0  While sexual ambiguity does occasionally occur...         0      1  
1  George Washington University (GW)’s Parliament...         1      0  
2  Perhaps that is why both Warren and Sanders ha...         0      1  
3  Despite a pandemic, one-sided presidential nom...         0      0  
4  Before President Donald Trump threatened to vi...         1      1  
5  At the dawn of the year 2010, few Americans co...         0      0  


In [None]:
# DeBERTa SG1
model = TFBertForSequenceClassification.from_pretrained("kamalkraj/deberta-base")
optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5) 
model.compile(optimizer=optimizer, loss='binary_crossentropy') 
model.load_weights(f'/content/drive/MyDrive/Colab Notebooks/weights/deberta_False_sg1_main')
trained_model_layer = model.get_layer(index=0).get_weights()
model.get_layer(index=0).set_weights(trained_model_layer)

tokenizer = DebertaTokenizer.from_pretrained("kamalkraj/deberta-base")

gc.collect()

In [60]:
cols = ['Opinon Label', 'Sentence', 'Predicted', 'Actual']
output = pd.DataFrame(columns=cols)

for s in sentences:
  inputs = tokenizer(s['sentence'], return_tensors="tf")
  logits = model(**inputs).logits
  predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
  row = {'Opinon Label': s['label_opinion'], 
         'Sentence': s['sentence'], 
         'Predicted': predicted_class_id, 
         'Actual': s['Label_bias']
    }
    
  output = output.append(row, ignore_index=True)

print(output)

                            Opinon Label  \
0                       Entirely factual   
1                       Entirely factual   
2             Expresses writer’s opinion   
3             Expresses writer’s opinion   
4  Somewhat factual but also opinionated   
5  Somewhat factual but also opinionated   

                                            Sentence Predicted Actual  
0  While sexual ambiguity does occasionally occur...         0      1  
1  George Washington University (GW)’s Parliament...         1      0  
2  Perhaps that is why both Warren and Sanders ha...         0      1  
3  Despite a pandemic, one-sided presidential nom...         0      0  
4  Before President Donald Trump threatened to vi...         0      1  
5  At the dawn of the year 2010, few Americans co...         0      0  


In [None]:
# DeBERTa Distant SG1
model = TFBertForSequenceClassification.from_pretrained("kamalkraj/deberta-base")
optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5) 
model.compile(optimizer=optimizer, loss='binary_crossentropy') 
model.load_weights(f'/content/drive/MyDrive/Colab Notebooks/weights/deberta_True_sg1_main')
trained_model_layer = model.get_layer(index=0).get_weights()
model.get_layer(index=0).set_weights(trained_model_layer)

tokenizer = DebertaTokenizer.from_pretrained("kamalkraj/deberta-base")

gc.collect()

In [62]:
cols = ['Opinon Label', 'Sentence', 'Predicted', 'Actual']
output = pd.DataFrame(columns=cols)

for s in sentences:
  inputs = tokenizer(s['sentence'], return_tensors="tf")
  logits = model(**inputs).logits
  predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
  row = {'Opinon Label': s['label_opinion'], 
         'Sentence': s['sentence'], 
         'Predicted': predicted_class_id, 
         'Actual': s['Label_bias']
    }
    
  output = output.append(row, ignore_index=True)

print(output)

                            Opinon Label  \
0                       Entirely factual   
1                       Entirely factual   
2             Expresses writer’s opinion   
3             Expresses writer’s opinion   
4  Somewhat factual but also opinionated   
5  Somewhat factual but also opinionated   

                                            Sentence Predicted Actual  
0  While sexual ambiguity does occasionally occur...         0      1  
1  George Washington University (GW)’s Parliament...         0      0  
2  Perhaps that is why both Warren and Sanders ha...         0      1  
3  Despite a pandemic, one-sided presidential nom...         0      0  
4  Before President Donald Trump threatened to vi...         0      1  
5  At the dawn of the year 2010, few Americans co...         0      0  


In [None]:
# ELECTRA SG1
model = TFBertForSequenceClassification.from_pretrained('google/electra-small-discriminator')
optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5) 
model.compile(optimizer=optimizer, loss='binary_crossentropy') 
model.load_weights(f'/content/drive/MyDrive/Colab Notebooks/weights/electra_False_sg1_main')
trained_model_layer = model.get_layer(index=0).get_weights()
model.get_layer(index=0).set_weights(trained_model_layer)

tokenizer = ElectraTokenizer.from_pretrained('google/electra-small-discriminator')

gc.collect()

In [64]:
cols = ['Opinon Label', 'Sentence', 'Predicted', 'Actual']
output = pd.DataFrame(columns=cols)

for s in sentences:
  inputs = tokenizer(s['sentence'], return_tensors="tf")
  logits = model(**inputs).logits
  predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
  row = {'Opinon Label': s['label_opinion'], 
         'Sentence': s['sentence'], 
         'Predicted': predicted_class_id, 
         'Actual': s['Label_bias']
    }
    
  output = output.append(row, ignore_index=True)

print(output)

                            Opinon Label  \
0                       Entirely factual   
1                       Entirely factual   
2             Expresses writer’s opinion   
3             Expresses writer’s opinion   
4  Somewhat factual but also opinionated   
5  Somewhat factual but also opinionated   

                                            Sentence Predicted Actual  
0  While sexual ambiguity does occasionally occur...         1      1  
1  George Washington University (GW)’s Parliament...         0      0  
2  Perhaps that is why both Warren and Sanders ha...         1      1  
3  Despite a pandemic, one-sided presidential nom...         1      0  
4  Before President Donald Trump threatened to vi...         1      1  
5  At the dawn of the year 2010, few Americans co...         0      0  


In [65]:
# sg2 sentences
fact_bias = df_sg2.loc[(df_sg2['label_opinion']=='Entirely factual') & (df_sg2['Label_bias']==1)].sample().iloc[0]
fact_nonbias = df_sg2.loc[(df_sg2['label_opinion']=='Entirely factual') & (df_sg2['Label_bias']==0)].sample().iloc[0]
opin_bias = df_sg2.loc[(df_sg2['label_opinion']=='Expresses writer’s opinion') & (df_sg2['Label_bias']==1)].sample().iloc[0]
opin_nonbias = df_sg2.loc[(df_sg2['label_opinion']=='Expresses writer’s opinion') & (df_sg2['Label_bias']==0)].sample().iloc[0]
both_bias = df_sg2.loc[(df_sg2['label_opinion']=='Somewhat factual but also opinionated') & (df_sg2['Label_bias']==1)].sample().iloc[0]
both_nonbias = df_sg2.loc[(df_sg2['label_opinion']=='Somewhat factual but also opinionated') & (df_sg2['Label_bias']==0)].sample().iloc[0]

sentences = [fact_bias, fact_nonbias, opin_bias, opin_nonbias, both_bias, both_nonbias]

In [None]:
# BERT Distant SG2
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased')
optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5) 
model.compile(optimizer=optimizer, loss='binary_crossentropy') 
model.load_weights(f'/content/drive/MyDrive/Colab Notebooks/weights/bert_True_sg2_main')
trained_model_layer = model.get_layer(index=0).get_weights()
model.get_layer(index=0).set_weights(trained_model_layer)

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

gc.collect()

In [67]:
cols = ['Opinon Label', 'Sentence', 'Predicted', 'Actual']
output = pd.DataFrame(columns=cols)

for s in sentences:
  inputs = tokenizer(s['sentence'], return_tensors="tf")
  logits = model(**inputs).logits
  predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
  row = {'Opinon Label': s['label_opinion'], 
         'Sentence': s['sentence'], 
         'Predicted': predicted_class_id, 
         'Actual': s['Label_bias']
    }
    
  output = output.append(row, ignore_index=True)

print(output)

                            Opinon Label  \
0                       Entirely factual   
1                       Entirely factual   
2             Expresses writer’s opinion   
3             Expresses writer’s opinion   
4  Somewhat factual but also opinionated   
5  Somewhat factual but also opinionated   

                                            Sentence Predicted Actual  
0  A tear slipped down the cheek of Ghanaian-Germ...         1      1  
1  Swedish police say they are searching for a bl...         1      0  
2  The Doctor Who franchise has tested this notio...         1      1  
3  Reducing the upper tax rate can often, counter...         0      0  
4  While the mayor has severely curtailed religio...         0      1  
5  Although Facebook Chief Operating Officer Sher...         1      0  


In [None]:
# RoBERTa Distant SG2
model = TFBertForSequenceClassification.from_pretrained('roberta-base')
optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5) 
model.compile(optimizer=optimizer, loss='binary_crossentropy') 
model.load_weights(f'/content/drive/MyDrive/Colab Notebooks/weights/roberta_True_sg2_main')
trained_model_layer = model.get_layer(index=0).get_weights()
model.get_layer(index=0).set_weights(trained_model_layer)

tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

gc.collect()

In [69]:
cols = ['Opinon Label', 'Sentence', 'Predicted', 'Actual']
output = pd.DataFrame(columns=cols)

for s in sentences:
  inputs = tokenizer(s['sentence'], return_tensors="tf")
  logits = model(**inputs).logits
  predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
  row = {'Opinon Label': s['label_opinion'], 
         'Sentence': s['sentence'], 
         'Predicted': predicted_class_id, 
         'Actual': s['Label_bias']
    }
    
  output = output.append(row, ignore_index=True)

print(output)

                            Opinon Label  \
0                       Entirely factual   
1                       Entirely factual   
2             Expresses writer’s opinion   
3             Expresses writer’s opinion   
4  Somewhat factual but also opinionated   
5  Somewhat factual but also opinionated   

                                            Sentence Predicted Actual  
0  A tear slipped down the cheek of Ghanaian-Germ...         0      1  
1  Swedish police say they are searching for a bl...         0      0  
2  The Doctor Who franchise has tested this notio...         1      1  
3  Reducing the upper tax rate can often, counter...         0      0  
4  While the mayor has severely curtailed religio...         0      1  
5  Although Facebook Chief Operating Officer Sher...         0      0  


In [None]:
# DeBERTa SG2
model = TFBertForSequenceClassification.from_pretrained("kamalkraj/deberta-base")
optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5) 
model.compile(optimizer=optimizer, loss='binary_crossentropy') 
model.load_weights(f'/content/drive/MyDrive/Colab Notebooks/weights/deberta_False_sg2_main')
trained_model_layer = model.get_layer(index=0).get_weights()
model.get_layer(index=0).set_weights(trained_model_layer)

tokenizer = DebertaTokenizer.from_pretrained("kamalkraj/deberta-base")

gc.collect()

In [71]:
cols = ['Opinon Label', 'Sentence', 'Predicted', 'Actual']
output = pd.DataFrame(columns=cols)

for s in sentences:
  inputs = tokenizer(s['sentence'], return_tensors="tf")
  logits = model(**inputs).logits
  predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
  row = {'Opinon Label': s['label_opinion'], 
         'Sentence': s['sentence'], 
         'Predicted': predicted_class_id, 
         'Actual': s['Label_bias']
    }
    
  output = output.append(row, ignore_index=True)

print(output)

                            Opinon Label  \
0                       Entirely factual   
1                       Entirely factual   
2             Expresses writer’s opinion   
3             Expresses writer’s opinion   
4  Somewhat factual but also opinionated   
5  Somewhat factual but also opinionated   

                                            Sentence Predicted Actual  
0  A tear slipped down the cheek of Ghanaian-Germ...         0      1  
1  Swedish police say they are searching for a bl...         0      0  
2  The Doctor Who franchise has tested this notio...         0      1  
3  Reducing the upper tax rate can often, counter...         0      0  
4  While the mayor has severely curtailed religio...         0      1  
5  Although Facebook Chief Operating Officer Sher...         0      0  


In [None]:
# DeBERTa Distant SG2
model = TFBertForSequenceClassification.from_pretrained("kamalkraj/deberta-base")
optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5) 
model.compile(optimizer=optimizer, loss='binary_crossentropy') 
model.load_weights(f'/content/drive/MyDrive/Colab Notebooks/weights/deberta_True_sg2_main')
trained_model_layer = model.get_layer(index=0).get_weights()
model.get_layer(index=0).set_weights(trained_model_layer)

tokenizer = DebertaTokenizer.from_pretrained("kamalkraj/deberta-base")

gc.collect()

In [73]:
cols = ['Opinon Label', 'Sentence', 'Predicted', 'Actual']
output = pd.DataFrame(columns=cols)

for s in sentences:
  inputs = tokenizer(s['sentence'], return_tensors="tf")
  logits = model(**inputs).logits
  predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
  row = {'Opinon Label': s['label_opinion'], 
         'Sentence': s['sentence'], 
         'Predicted': predicted_class_id, 
         'Actual': s['Label_bias']
    }
    
  output = output.append(row, ignore_index=True)

print(output)

                            Opinon Label  \
0                       Entirely factual   
1                       Entirely factual   
2             Expresses writer’s opinion   
3             Expresses writer’s opinion   
4  Somewhat factual but also opinionated   
5  Somewhat factual but also opinionated   

                                            Sentence Predicted Actual  
0  A tear slipped down the cheek of Ghanaian-Germ...         1      1  
1  Swedish police say they are searching for a bl...         1      0  
2  The Doctor Who franchise has tested this notio...         1      1  
3  Reducing the upper tax rate can often, counter...         1      0  
4  While the mayor has severely curtailed religio...         1      1  
5  Although Facebook Chief Operating Officer Sher...         1      0  


In [None]:
# ELECTRA SG2
model = TFBertForSequenceClassification.from_pretrained('google/electra-small-discriminator')
optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5) 
model.compile(optimizer=optimizer, loss='binary_crossentropy') 
model.load_weights(f'/content/drive/MyDrive/Colab Notebooks/weights/electra_False_sg2_main')
trained_model_layer = model.get_layer(index=0).get_weights()
model.get_layer(index=0).set_weights(trained_model_layer)

tokenizer = ElectraTokenizer.from_pretrained('google/electra-small-discriminator')

gc.collect()

In [75]:
cols = ['Opinon Label', 'Sentence', 'Predicted', 'Actual']
output = pd.DataFrame(columns=cols)

for s in sentences:
  inputs = tokenizer(s['sentence'], return_tensors="tf")
  logits = model(**inputs).logits
  predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
  row = {'Opinon Label': s['label_opinion'], 
         'Sentence': s['sentence'], 
         'Predicted': predicted_class_id, 
         'Actual': s['Label_bias']
    }
    
  output = output.append(row, ignore_index=True)

print(output)

                            Opinon Label  \
0                       Entirely factual   
1                       Entirely factual   
2             Expresses writer’s opinion   
3             Expresses writer’s opinion   
4  Somewhat factual but also opinionated   
5  Somewhat factual but also opinionated   

                                            Sentence Predicted Actual  
0  A tear slipped down the cheek of Ghanaian-Germ...         0      1  
1  Swedish police say they are searching for a bl...         0      0  
2  The Doctor Who franchise has tested this notio...         1      1  
3  Reducing the upper tax rate can often, counter...         0      0  
4  While the mayor has severely curtailed religio...         0      1  
5  Although Facebook Chief Operating Officer Sher...         0      0  
