# Evaulate Distilbert Model with Test Data
### Includes Titles

In [1]:
import json
import pickle
import sys

import pandas as pd
import sklearn.metrics
import tqdm
import torch
import transformers as hft

sys.path.insert(0, "/home/jupyter")
import util.log
import util.data

In [2]:
MODEL_NAME = "distilbert-base-uncased"
MODEL_FILE ="saved_model_epoch10_20201126_1415.tar"
TEST_DATASET = "test_dataset_all_titles_29Nov_distilbert.pickle"
EVAL_BATCH_SIZE = 64

DESCRIPTION = "Distilbert trained on politifact and gossipcop test data, with encoded titles."

In [3]:
with open(TEST_DATASET, "rb") as pfile:
    test_dataset = pickle.load(pfile)

In [4]:
checkpoint = torch.load(MODEL_FILE)
model = (hft.DistilBertForSequenceClassification
         .from_pretrained(MODEL_NAME))

In [6]:
model.load_state_dict(checkpoint["model"])

<All keys matched successfully>

In [7]:
device = (torch.device('cuda') if torch.cuda.is_available()
          else torch.device('cpu'))
model.to(device)
model.eval();

In [8]:
eval_loader = torch.utils.data.DataLoader(test_dataset,
                                batch_size=EVAL_BATCH_SIZE,
                                shuffle=False)

In [9]:
labels = []
preds = []
logits = []
probs = []
sources = []
article_tokens = []
file_names = []

softmax = torch.nn.Softmax(dim=1)

with torch.no_grad():
    for art_num, article in tqdm.tqdm(enumerate(eval_loader)):
        input_ids = article["input_ids"].to(device)
        attent_mask = article["attention_mask"].to(device)
        output = model(input_ids,
                       attention_mask=attent_mask,
                       output_hidden_states=False,
                       output_attentions=False)
        prob = softmax(output[0].detach().cpu()).numpy()
        logit = output[0].detach().cpu().numpy()
        pred = [0 if lgt[0] > lgt[1] else 1 for lgt in logit]
        label = article["labels"].numpy()
        labels.extend(label)
        preds.extend(pred)
        logits.extend(logit)
        probs.extend(prob)
        article_tokens.extend(article["article_tokens"].numpy())
        sources.extend(article["sources"])
        file_names.extend(article["file_names"])

58it [00:31,  1.86it/s]


In [10]:
# Label of 0 is real, 1 is fake
eval_results = pd.DataFrame({"Article": list(range(len(labels))),
                             "Source": sources,
                             "Token_Length": article_tokens,
                             "Predictions": preds,
                             "Labels": labels,
                             "Logit_real(0)": [x[0] for x in logits],
                             "Logit_fake(1)": [x[1] for x in logits],
                             "Prob_real(0)": [x[0] for x in probs],
                             "Prob_real(1)": [x[1] for x in probs],
                             "File_Name": file_names})

eval_results

Unnamed: 0,Article,Source,Token_Length,Predictions,Labels,Logit_real(0),Logit_fake(1),Prob_real(0),Prob_real(1),File_Name
0,0,gossipcop,389,0,1,4.675294,-4.944095,0.999934,0.000066,gossipcop-8686267368
1,1,gossipcop,645,0,0,4.842173,-5.164282,0.999955,0.000045,gossipcop-934994
2,2,gossipcop,738,0,0,4.633334,-4.959841,0.999932,0.000068,gossipcop-843576
3,3,gossipcop,616,0,0,4.870132,-5.104851,0.999954,0.000047,gossipcop-930194
4,4,gossipcop,480,1,1,-2.567918,2.486975,0.006338,0.993662,gossipcop-4054055901
...,...,...,...,...,...,...,...,...,...,...
3655,3655,gossipcop,592,1,1,-3.652731,3.658537,0.000668,0.999332,gossipcop-394386553
3656,3656,gossipcop,408,0,0,4.282269,-4.569964,0.999857,0.000143,gossipcop-931949
3657,3657,gossipcop,233,0,0,4.556463,-4.839029,0.999917,0.000083,gossipcop-872770
3658,3658,gossipcop,407,0,0,4.759398,-5.120811,0.999949,0.000051,gossipcop-870644


In [11]:
# Save evaluation data to csv file
eval_results.to_csv("test_data_" + MODEL_FILE[12:-4] + ".csv")

In [12]:
eval_results.query("Predictions == 1")

Unnamed: 0,Article,Source,Token_Length,Predictions,Labels,Logit_real(0),Logit_fake(1),Prob_real(0),Prob_real(1),File_Name
4,4,gossipcop,480,1,1,-2.567918,2.486975,0.006338,0.993662,gossipcop-4054055901
9,9,gossipcop,427,1,1,-0.247387,0.055738,0.424794,0.575206,gossipcop-8953282482
10,10,gossipcop,433,1,1,-3.598010,3.564700,0.000774,0.999226,gossipcop-1170441384
18,18,gossipcop,579,1,0,-1.111382,0.987312,0.109224,0.890776,gossipcop-939986
19,19,gossipcop,358,1,1,-3.509060,3.496651,0.000906,0.999094,gossipcop-638030695
...,...,...,...,...,...,...,...,...,...,...
3636,3636,politifact,176,1,1,-0.122455,-0.053680,0.482813,0.517187,politifact14596
3650,3650,gossipcop,616,1,0,-0.475947,0.314975,0.311971,0.688029,gossipcop-924248
3651,3651,gossipcop,659,1,1,-3.813829,3.781264,0.000503,0.999497,gossipcop-1603768996
3652,3652,gossipcop,404,1,1,-3.808629,3.827759,0.000482,0.999518,gossipcop-2908447234


In [13]:
for x in range(0, 10):
    print(eval_results["Logit_real(0)"][x], eval_results["Logit_fake(1)"][x])

4.675294399261475 -4.944095134735107
4.842173099517822 -5.164281845092773
4.633334159851074 -4.959841251373291
4.870131969451904 -5.104851245880127
-2.567918300628662 2.4869751930236816
4.497280597686768 -4.727911472320557
4.24395227432251 -4.548521518707275
4.863532543182373 -5.093557834625244
4.839032173156738 -5.061578273773193
-0.24738657474517822 0.055737826973199844


In [14]:
# Overall Metrics
precision, recall, f1, _ = sklearn.metrics.precision_recall_fscore_support(
    eval_results.Labels, eval_results.Predictions, average="binary")
accuracy = sklearn.metrics.accuracy_score(eval_results.Labels, eval_results.Predictions)

metrics = {"description": DESCRIPTION,
           "eval_notes": "Evaluating both gossipcop and poltifact data",
           "model": MODEL_FILE,
           "epochs": checkpoint["epoch"],
           "batch_size": EVAL_BATCH_SIZE,
           "Date_evaluated": "25Nov2020",
           "train_data": "train_dataset_25Nov_source_distilbert.pickle",
           "val_data": "eval_data_" + MODEL_FILE[12:-4] + ".csv",
           "eval_data": TEST_DATASET,
           "accuracy": accuracy,
           "precision": precision,
           "recall": recall,
           "f1": f1}
metrics

{'description': 'Distilbert trained on politifact and gossipcop test data, with encoded titles.',
 'eval_notes': 'Evaluating both gossipcop and poltifact data',
 'model': 'saved_model_epoch10_20201126_1415.tar',
 'epochs': 10,
 'batch_size': 64,
 'Date_evaluated': '25Nov2020',
 'train_data': 'train_dataset_25Nov_source_distilbert.pickle',
 'val_data': 'eval_data_epoch10_20201126_1415.csv',
 'eval_data': 'test_dataset_all_titles_29Nov_distilbert.pickle',
 'accuracy': 0.8603825136612022,
 'precision': 0.7752956636005256,
 'recall': 0.6344086021505376,
 'f1': 0.6978119455943229}

In [17]:
# Save metrics to JSON text file.
with open("eval_metrics_" + MODEL_FILE[12:-4] + ".json", "wt") as jfile:
    json.dump(metrics, jfile)

In [18]:
sum(labels)

930

In [19]:
cm = pd.DataFrame(sklearn.metrics.confusion_matrix(eval_results.Labels, eval_results.Predictions))
label_titles = {0: "real", 1: "fake"}
cm.rename(index=label_titles, columns=label_titles, inplace=True)
cm

Unnamed: 0,real,fake
real,2559,171
fake,340,590
