In [2]:
# core
import pandas as pd
import numpy as np
import time

# modeling
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

import torch
import torch.nn as nn
from torch.utils.data import DataLoader

# functional
import joblib
import pickle

# custom
from scripts.classes import RnnTextClassifier, RnnDataset
from scripts.functions import train_rnn, rnn_bce_logits_predict

# warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

## Load Data

In [3]:
data_names = [
    'tfidf_train', 'w2v_train', 'bert_train', 'gpt_train', 
    'tfidf_test', 'w2v_test', 'bert_test', 'gpt_test', 
    'y_train', 'y_test'
]

for var_name in data_names:
    with open(f'data/{var_name}.pkl', 'rb') as f:
        globals()[var_name] = pickle.load(f)

In [4]:
label_to_int = {label: idx for idx, label in enumerate(np.unique(y_train))}
int_to_label = {idx: label for idx, label in enumerate(np.unique(y_train))}

## Binzarize Labels

In [5]:
int_to_label

{0: 'barely-true',
 1: 'false',
 2: 'half-true',
 3: 'mostly-true',
 4: 'pants-fire',
 5: 'true'}

In [6]:
false = ['false', 'pants-fire', 'barely-true']
true = ['half-true', 'mostly-true', 'true']

y_train_binary = torch.tensor(np.where(y_train.isin(false), 1, 0), dtype = torch.float)
y_test_binary = torch.tensor(np.where(y_test.isin(false), 1, 0), dtype = torch.float)

print(
    f'---Train---\n{y_train_binary.unique(return_counts = True)[1] / len(y_train_binary)}\n'
    f'---Test---\n{y_test_binary.unique(return_counts = True)[1] / len(y_test_binary)}'
)

---Train---
tensor([0.5617, 0.4383])
---Test---
tensor([0.5635, 0.4365])


In [6]:
with open(f'data/y_train_binary.pkl', 'wb') as f:
    pickle.dump(y_train_binary, f)

with open(f'data/y_test_binary.pkl', 'wb') as f:
    pickle.dump(y_test_binary, f)

## Retrain models on binary labels

In [7]:
# start_time = time.perf_counter()

# for X_train, X_name in zip([tfidf_train, w2v_train, bert_train, gpt_train], ['tfidf', 'w2v', 'bert', 'gpt']):

#     # recurrent neural network
#     rnn = RnnTextClassifier(
#         input_size = X_train.shape[1], output_size = 1, 
#         hidden_size = 256, num_layers = 2, dropout = 0.5
#     )
#     criterion = nn.BCEWithLogitsLoss()
#     optimizer = torch.optim.Adam(rnn.parameters(), lr = 0.001)
#     n_epochs = range(100)
#     dataset = RnnDataset(X_train, y_train_binary.unsqueeze(1))
#     data_loader = DataLoader(dataset, batch_size = int(X_train.shape[0] / 128), shuffle = True)
#     trained_rnn = train_rnn(rnn, data_loader, criterion, optimizer, n_epochs)
#     torch.save(trained_rnn, f'models/rnn_binary_{X_name}.pth')
#     print(f'Finished with RNN-{X_name} - Time elapsed: {(time.perf_counter()-start_time)/60:.2f}\n')

#     # logistic regression
#     lr = LogisticRegression(max_iter = 4000, penalty='l2', C = 0.1)
#     lr.fit(X_train, y_train_binary)
#     joblib.dump(lr, f'models/lr_binary_{X_name}.joblib')
#     print(f'Finished with LR-{X_name} - Time elapsed: {(time.perf_counter()-start_time)/60:.2f}\n')

#     # random forest
#     rf = RandomForestClassifier(n_estimators = 1000, max_depth = 100, max_features = 'log2')
#     rf.fit(X_train, y_train_binary)
#     joblib.dump(rf, f'models/rf_binary_{X_name}.joblib')
#     print(f'Finished with RF-{X_name} - Time elapsed: {(time.perf_counter()-start_time)/60:.2f}\n')
        
#     # support vector machine
#     svm = SVC(kernel = 'linear')
#     svm.fit(X_train, y_train_binary)
#     joblib.dump(svm, f'models/svm_binary_{X_name}.joblib')
#     print(f'Finished with SVM-{X_name} - Time elapsed: {(time.perf_counter()-start_time)/60:.2f}\n')

In [8]:
lr_tfidf = joblib.load('models/lr_binary_tfidf.joblib')
lr_w2v = joblib.load('models/lr_binary_w2v.joblib')
lr_bert = joblib.load('models/lr_binary_bert.joblib')
lr_gpt = joblib.load('models/lr_binary_gpt.joblib')

rf_tfidf = joblib.load('models/rf_binary_tfidf.joblib')
rf_w2v = joblib.load('models/rf_binary_w2v.joblib')
rf_bert = joblib.load('models/rf_binary_bert.joblib')
rf_gpt = joblib.load('models/rf_binary_gpt.joblib')

svm_tfidf = joblib.load('models/svm_binary_tfidf.joblib')
svm_w2v = joblib.load('models/svm_binary_w2v.joblib')
svm_bert = joblib.load('models/svm_binary_bert.joblib')
svm_gpt = joblib.load('models/svm_binary_gpt.joblib')

rnn_tfidf = torch.load('models/rnn_binary_tfidf.pth')
rnn_w2v = torch.load('models/rnn_binary_w2v.pth')
rnn_bert = torch.load('models/rnn_binary_bert.pth')
rnn_gpt = torch.load('models/rnn_binary_gpt.pth')

## Evaluation

In [9]:
models = [
    rnn_tfidf, rnn_w2v, rnn_bert, rnn_gpt,
    lr_tfidf, lr_w2v, lr_bert, lr_gpt,
    rf_tfidf, rf_w2v, rf_bert, rf_gpt, 
    svm_tfidf, svm_w2v, svm_bert, svm_gpt
]

data_sets = [
    (tfidf_train, tfidf_test), (w2v_train, w2v_test), 
    (bert_train, bert_test), (gpt_train, gpt_test),
]

data_names = ['tfidf', 'w2v', 'bert', 'gpt']

In [16]:
columns = ['model', 'data', 'accuracy_is', 'accuracy_oos', 'precision_oos', 'recall_oos', 'f1_oos']

results = pd.DataFrame(columns = columns)

for model, data, data_name in zip(models, data_sets*4, data_names*4):
    X_train, X_test = data
    if 'Rnn' in str(model):
        y_pred_train = rnn_bce_logits_predict(model, X_train)
        y_pred_test = rnn_bce_logits_predict(model, X_test)
    else:
        y_pred_train = model.predict(X_train)
        y_pred_test = model.predict(X_test)
    accuracy_train = accuracy_score(y_train_binary, y_pred_train)
    accuracy_test = accuracy_score(y_test_binary, y_pred_test)
    precision_oos = precision_score(y_test_binary, y_pred_test)
    recall_oos = recall_score(y_test_binary, y_pred_test)
    f1_oos = f1_score(y_test_binary, y_pred_test)
    confusion_oos = confusion_matrix(y_test_binary, y_pred_test, normalize="true")

    result = dict(zip(columns, [
        str(model), data_name, accuracy_train, accuracy_test, 
        precision_oos, recall_oos, f1_oos
    ]))
    results = pd.concat([results, pd.DataFrame(result, index=[0])], ignore_index=True)

    print(
        f'Model: {model}\n'
        f'Data: {data_name}\n'
        f'In-sample accuracy: {accuracy_train:.3f}\n'
        f'Out-of-sample accuracy: {accuracy_test:.3f}\n'
        f'Out-of-sample F1: {f1_oos:.3f}\n'
        f'Confusion Matrix (OOS):\n{np.round(confusion_oos, 3)}\n'
        f'\n----------\n'
    )

Model: RnnTextClassifier(
  (rnn): RNN(1000, 256, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=256, out_features=1, bias=True)
)
Data: tfidf
In-sample accuracy: 0.825
Out-of-sample accuracy: 0.558
Out-of-sample F1: 0.511
Confusion Matrix (OOS):
[[0.58 0.42]
 [0.47 0.53]]

----------

Model: RnnTextClassifier(
  (rnn): RNN(1000, 256, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=256, out_features=1, bias=True)
)
Data: w2v
In-sample accuracy: 0.591
Out-of-sample accuracy: 0.579
Out-of-sample F1: 0.476
Confusion Matrix (OOS):
[[0.689 0.311]
 [0.562 0.438]]

----------

Model: RnnTextClassifier(
  (rnn): RNN(768, 256, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=256, out_features=1, bias=True)
)
Data: bert
In-sample accuracy: 0.922
Out-of-sample accuracy: 0.612
Out-of-sample F1: 0.475

KeyboardInterrupt: 

In [12]:
results

Unnamed: 0,model,data,accuracy_is,accuracy_oos,precision_oos,recall_oos,f1_oos
0,"RnnTextClassifier(\n (rnn): RNN(1000, 256, nu...",tfidf,0.825391,0.558011,0.494098,0.529837,0.511344
1,"RnnTextClassifier(\n (rnn): RNN(1000, 256, nu...",w2v,0.591016,0.579321,0.521552,0.437613,0.47591
2,"RnnTextClassifier(\n (rnn): RNN(768, 256, num...",bert,0.922168,0.61247,0.581152,0.401447,0.474866
3,"RnnTextClassifier(\n (rnn): RNN(768, 256, num...",gpt,0.698047,0.587214,0.556818,0.265823,0.359853
4,"LogisticRegression(C=0.1, max_iter=4000)",tfidf,0.635645,0.606946,0.621145,0.254973,0.361538
5,"LogisticRegression(C=0.1, max_iter=4000)",w2v,0.588574,0.596685,0.561047,0.349005,0.430323
6,"LogisticRegression(C=0.1, max_iter=4000)",bert,0.663965,0.621152,0.60767,0.372514,0.461883
7,"LogisticRegression(C=0.1, max_iter=4000)",gpt,0.671875,0.585635,0.540936,0.334539,0.413408
8,"RandomForestClassifier(max_depth=100, max_feat...",tfidf,0.959863,0.599842,0.58156,0.296564,0.392814
9,"RandomForestClassifier(max_depth=100, max_feat...",w2v,0.999609,0.583268,0.526882,0.443038,0.481336
