# Performance evaluation of the pre-trained RoBERTa classifier used in the Coan et al. (2021) article 'Computer-assisted detection and classification of misinformation about climate change'

In [None]:
## Author: Mirjam Nanko
## Date Created: 2021-02-01
## Email: m.nanko@exeter.ac.uk

## Setup

In [None]:
# Load the required packages

# Dataframes
import pandas as pd

# Regular expressions
import re

# Unidecoder
import unicodedata

# Timestamp / time measurment
import time

# Simpletransformers classifier
from simpletransformers.classification import ClassificationModel

# Label encode
from sklearn.preprocessing import LabelEncoder

# Class weights
from sklearn.utils.class_weight import compute_class_weight

# Model performance scores
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score

# PyTorch: enable GPU access
import torch

# If you want to select a specific GPU, set it here:
# gpu = 0
# torch.cuda.set_device(gpu) 

# If there's a GPU available...
if torch.cuda.is_available():    

    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use GPU {}:'.format(torch.cuda.current_device()), torch.cuda.get_device_name(torch.cuda.current_device()))

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

In [None]:
# Define required functions

# Define additional model performance scores (F1)
def f1_multiclass_macro(labels, preds):
    return f1_score(labels, preds, average='macro')
def f1_multiclass_micro(labels, preds):
    return f1_score(labels, preds, average='micro')
def f1_multiclass_weighted(labels, preds):
    return f1_score(labels, preds, average='weighted')
def f1_class(labels, preds):
    return f1_score(labels, preds, average=None)
def precision(labels, preds):
    return precision_score(labels, preds, average='macro')
def recall(labels, preds):
    return recall_score(labels, preds, average='macro')

# Define text pre-processing functions
def remove_between_square_brackets(text):
    return re.sub('\[[^]]*\]', '', text)
def remove_non_ascii(text):
    """Remove non-ASCII characters from list of tokenized words"""
    return unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8', 'ignore')
def strip_underscores(text):
    return re.sub(r'_+', ' ', text)
def remove_multiple_spaces(text):
    return re.sub(r'\s{2,}', ' ', text)

# Merge text pre-processing functions
def denoise_text(text):
    text = remove_between_square_brackets(text)
    text = remove_non_ascii(text)
    text = strip_underscores(text)
    text = remove_multiple_spaces(text)
    return text.strip()

## Data Preparation

In [None]:
# Load and pre-process the text data

# Load the data
valid = pd.read_csv('training/valid.csv').rename(columns={"label": "labels_orig"})
test = pd.read_csv('training/test.csv').rename(columns={"label": "labels_orig"})

# Pre-process the text
valid['text'] = valid['text'].astype(str).apply(denoise_text)
test['text'] = test['text'].astype(str).apply(denoise_text)

# Load the label encoder
label_encoder = LabelEncoder()

# Encode the labels
valid['labels'] = label_encoder.fit_transform(valid.labels_orig)
test['labels'] = label_encoder.fit_transform(test.labels_orig)

## Evaluate the performance of the pre-trained RoBERTa classifier used in the Coan et al. (2021) article


In [None]:
%%time

# Define model 
architecture = 'roberta'

# Define trained classifier
model_name = 'CARDS_RoBERTa_Classifier'

# Load the classifier
model = ClassificationModel(architecture, model_name)

# Evaluate the model on the validation data
result, model_outputs, wrong_predictions = model.eval_model(valid, 
                                                            f1_macro = f1_multiclass_macro, 
                                                            precision = precision, 
                                                            recall = recall,
                                                            acc = accuracy_score,
                                                            f1_micro = f1_multiclass_micro, 
                                                            f1_weighted = f1_multiclass_weighted, 
                                                            f1_class = f1_class)

# Evaluate the model on the testing data
result_test, model_outputs_test, wrong_predictions_test = model.eval_model(test, 
                                                                           f1_macro = f1_multiclass_macro,
                                                                           precision = precision, 
                                                                           recall = recall,
                                                                           acc = accuracy_score, 
                                                                           f1_micro = f1_multiclass_micro, 
                                                                           f1_weighted = f1_multiclass_weighted, 
                                                                           f1_class = f1_class)

# Print results
print('\n\nThese are the results when testing the model on the validation data set:\n')
print(result)

print('\n\nThese are the results when testing the model on the testing data set:\n')
print(result_test)