## This is the jupyter notebook for testing of controversy detection model

## Import libraries

In [1]:
# Import of default libraries
import os
import json
import pickle
import pandas as pd
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
import collections
from collections import OrderedDict
from sklearn.metrics import precision_recall_fscore_support

# Import of the config file
from config import config

# Setting GPU to run code
os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(str(gpu) for gpu in vars(config)["gpu_idx"])

# Import of torch packages
import torch
import torchtext
from torchtext import vocab
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable

# Import of self defined classes
from DataLoader import DataLoader
from Transformer import HierarchicalTransformer
from Encoder import WordEncoder
from Encoder import PositionEncoder
from Optimizer import Optimizer

from utils.utils import *
from utils.parallel import * 

# Make some changes to the config
config.batch_size_test = 4

## Settings

In [2]:
# Settings
cpu = torch.device("cpu")
split = 0 # Just a hack (Please ignore it)

# File paths (For the model's states)
folder = "/home/klingmin/controversy_detection/models/twitter15_split_0/best_model/"
model_file = "best_model_accuracy_test.pt"
word_encoder_file = "best_model_word_encoder_accuracy_test.pt"
word_pos_encoder_file = "best_model_word_pos_encoder_accuracy_test.pt"

model_path = os.path.join(folder, model_file)
word_encoder_path = os.path.join(folder, word_encoder_file)
word_pos_encoder_path = os.path.join(folder, word_pos_encoder_file)

# File paths (For the test file)
test_file_path = "../../data/processed/test.json"

# File path (For glove)
glove_directory = "../../data/glove/"
glove_file = "glove.6B.300d.txt"

### Mappings of the labels

In [3]:
string_to_id = {"false" : 0,
                "true" : 1,
                "unverified" : 2,
                "non-rumor" : 3}


id_to_string = {0 : "false",
                1 : "true",
                2 : "unverified",
                3 : "non-rumor"}


mapping_for_predicted_y = {"false" : "false",
                           "true" : "true",
                           "unverified" : "unverified",
                           "non-rumor" : "non-rumor"}

## Helper Functions

### Creating new state dict - Issues because of multi-GPU 

In [4]:
def create_new_state_dict(current_state_dict):
    
    new_state_dict = OrderedDict()
    for k, v in current_state_dict.items():
        name = k[7:] # remove module.
        new_state_dict[name] = v
    
    return new_state_dict

### Get Y labels

In [5]:
def get_labels(tensor):
    
    return np.argmax(tensor, axis = 1)

### id_ index to id_

In [6]:
def id_index_to_id_(id_list, loader):
    
    id_index = [loader.id_field.vocab.itos[id_] for id_ in id_list]
    return id_index

## Loading of models

### Read in the saved states

In [7]:
# Reading in the saved states 
model_checkpoint = create_new_state_dict(torch.load(model_path))
word_encoder_checkpoint = torch.load(word_encoder_path)
word_pos_encoder_checkpoint = torch.load(word_pos_encoder_path)

### Load the saved states into the model

In [8]:
# Load states into the model
hierarchical_transformer = HierarchicalTransformer.HierarchicalTransformer(config)
hierarchical_transformer.load_state_dict(model_checkpoint)
hierarchical_transformer.eval()

# Getting the data loader
loader = DataLoader.DataLoader(config, split, type_ = "test", lang = "zh")
loader.define_fields()
loader.test = loader.read_data(test_file_path)

# Getting the vocab vectors
vec = vocab.Vectors(name = glove_file, cache = glove_directory)

# Building the id_field
loader.id_field.build_vocab(getattr(loader.test, config.keys_order["post_id"]))

# Build the vocabulary (for tweets) using the test dataset
loader.tweet_field.build_vocab(getattr(loader.test, config.keys_order["content"]), 
                               max_size = config.max_vocab, 
                               vectors = vec)

# Iterating through the test set to get test batches 
loader.test_batch = loader.load_batches(loader.test, config.batch_size_test)

# Getting the word encoder
word_encoder = WordEncoder.WordEncoder(config, loader)
word_encoder.eval()

# Getting the word position encoder 
word_pos_encoder = PositionEncoder.PositionEncoder(config, config.max_length)
word_pos_encoder.load_state_dict(word_pos_encoder_checkpoint)
word_pos_encoder.eval()

# Getting the time delay encoder
time_delay_encoder = PositionEncoder.PositionEncoder(config, config.size)

Doing RD for chinese


### Moving the modules to GPU

In [9]:
if config.gpu:
    print("Moving the model to the GPU")
    if len(config.gpu_idx) > 1:
        hierarchical_transformer = DataParallelModel(hierarchical_transformer.cuda())
    else:
        hierarchical_transformer = hierarchical_transformer.cuda()

Moving the model to the GPU


## Getting of test results

In [10]:
results_dict = {}
with torch.no_grad():
    
    for id_, X, y, word_pos, time_delay, structure, attention_mask_word, attention_mask_post in loader.get_data("test", return_id = True):

        # <-------------- Casting as a variable -------------->
        id_ = id_index_to_id_(id_, loader)
        X = Variable(X)
        X_ = X
        word_pos = Variable(word_pos)
        time_delay = Variable(time_delay)
        structure = Variable(structure)
        attention_mask_word = Variable(attention_mask_word)
        attention_mask_post = Variable(attention_mask_post)
        
        # <-------------- Encode content -------------->
        X = word_encoder(X)
        word_pos = word_pos_encoder(word_pos)
        time_delay = time_delay_encoder(time_delay)

        # <-------------- Move to GPU -------------->
        if config.gpu:
            X = X.cuda()
            word_pos = word_pos.cuda()
            time_delay = time_delay.cuda()
            structure = structure.cuda()
            attention_mask_word = attention_mask_word.cuda()
            attention_mask_post = attention_mask_post.cuda()

        # <-------------- Getting the predictions -------------->
        if len(config.gpu_idx) > 1:
            
            predicted_y, last_layer_attention, self_atten_weights_dict_word, self_atten_weights_dict_post = zip(* hierarchical_transformer(X, word_pos, time_delay, structure, attention_mask_word = attention_mask_word, attention_mask_post = attention_mask_post, return_attention = True))

            # Merge into 1 batch 
            predicted_y = torch.cat(list(predicted_y), dim = 0)
            last_layer_attention = torch.cat(list(last_layer_attention), dim = 0)

            self_atten_weights_dict_word = merge_attention_dict(self_atten_weights_dict_word, config, "word")
            self_atten_weights_dict_post = merge_attention_dict(self_atten_weights_dict_post, config, "post")

        else:
            predicted_y, last_layer_attention, self_atten_weights_dict_word, self_atten_weights_dict_post = hierarchical_transformer(X, word_pos, time_delay, structure, attention_mask_word = attention_mask_word, attention_mask_post = attention_mask_post, return_attention = True)

        
        # <-------------- Getting the predicted labels -------------->
        predicted_y = predicted_y.cpu().numpy()
        predicted_y_labels = get_labels(predicted_y)
        predicted_y_labels = [id_to_string[y] for y in predicted_y_labels]
        
        # <-------------- Getting the predicted labels -------------->
        current_results = {id_[i] : {"predicted_y" : mapping_for_predicted_y[predicted_y_labels[i]], "true_y" : id_to_string[y.cpu().numpy()[i]]} for i in range(len(predicted_y_labels))}
        
        # <-------------- Merging the results -------------->
        results_dict = {**results_dict, **current_results}
        
        # <-------------- Free up the GPU -------------->
        del id_
        del X
        del y
        del predicted_y
        del word_pos
        del time_delay
        del structure
        del attention_mask_word
        del attention_mask_post
        del self_atten_weights_dict_word
        del self_atten_weights_dict_post
        torch.cuda.empty_cache()


## Getting statistics of the results

In [11]:
breakdown = collections.Counter([record["predicted_y"] for record in results_dict.values()])
acc = sum([record["predicted_y"] == record["true_y"] for record in results_dict.values()]) / len(results_dict) * 100

true_y = [record["true_y"] for record in results_dict.values()]
pred_y = [record["predicted_y"] for record in results_dict.values()]
pre, recall, fscore, _ = precision_recall_fscore_support(true_y, pred_y, average = "macro")
fscore = round(fscore * 100, 1)

print("This is the breakdown of the predicted labels : {}".format(breakdown))
print("This is the accuracy of the model : {}%".format(acc))
print("This is the F-score of the model : {}%".format(fscore))

This is the breakdown of the predicted labels : Counter({'true': 6, 'false': 4})
This is the accuracy of the model : 60.0%
This is the F-score of the model : 58.3%
