In [None]:
import pandas as pd
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
from transformers import set_seed, enable_full_determinism

In [None]:
def enable_reproducability(repro_seed):
    set_seed(repro_seed)
    enable_full_determinism(repro_seed)

In [None]:
def select_text(df, remove_footer, remove_emojis):
    if remove_footer:
        if remove_emojis:
            df.rename(columns={"no_footer_no_replacement_words_no_emoji": "text"}, inplace=True) 
        else: 
            df.rename(columns={"no_footer_no_replacement_words": "text"}, inplace=True)
    else:
        if remove_emojis:
            df.rename(columns={"no_replacement_words_no_emoji": "text"}, inplace=True)  
        else:
            df.rename(columns={"no_replacement_words": "text"}, inplace=True)

    df['text'] = df['text'].astype(str)
    df = df[['text', 'label']]
    
    print('Length of df: ', len(df)) 

    return df

In [None]:
def downsample(train, sampling_seed):
    train_pos = train[train.label==1]
    train_neg = train.query('(label==0)').sample(n=len(train_pos), random_state=sampling_seed)
    train = pd.concat([train_neg,train_pos])
    train = train.sample(frac=1, random_state=sampling_seed)
    return train

In [None]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary', pos_label=1)
    precision0, recall0, f10, _ = precision_recall_fscore_support(labels, preds, average='binary', pos_label=0)
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1_1': f1,
        'precision_1': precision,
        'recall_1': recall,
        'f1_0': f10,
        'precision_0': precision0,
        'recall_0': recall0
    }