In [None]:
import spacy
import numpy as np
import re
import itertools
from collections import Counter
import json
import pandas as pd
import torch
import dill as pickle
import time
from data_module.data_preprocessor import preprocess_question, get_label
from collections import Counter
from pandas_ml import ConfusionMatrix

## Load Test Dataset, Models, and Preprocessing Module

In [None]:
df_test = pd.read_csv("ir_test_dataset.csv")

In [None]:
models = []
models.append(torch.load('ensemble_learning_related/model1.model'))
models.append(torch.load('ensemble_learning_related/model2.model'))
models.append(torch.load('ensemble_learning_related/model3.model'))
models.append(torch.load('ensemble_learning_related/model4.model'))

In [None]:
text_field = pickle.load(open("ensemble_learning_related/text_vocab.pkl", "rb"))
label_field = pickle.load(open("ensemble_learning_related/label_vocab.pkl", "rb"))

## Create function for evaluation

In [None]:
def model_prediction(model, text_field, label_field, test_data):
    res = []
    time_predictions = []
    for text in test_data:
        start_time = time.time()
        text = preprocess_question(text, text_field, use_gpu=True)
        model.eval()
        y = model(text)
        label_string = get_label(y, label_field)
        time_predictions.append(time.time() - start_time)
        res.append(label_string)
        del text
        torch.cuda.empty_cache()
    avg_time = np.average(time_predictions)
    #return prediction result and avg time to predict a comment
    return res, avg_time

In [None]:
def most_voted(res_column_stack):
    most_voted_res = []
    for row in res_column_stack:
        c = Counter(row)
        most_voted_res.append(int(c.most_common(1)[0][0]))
    return most_voted_res

In [None]:
def ensemble_model_prediction(models, text_field, label_field, test_data):
    res_all = []
    avg_time_each_model = []
    for model in models:
        res_tmp, avg_time_tmp = model_prediction(model, text_field, label_field, test_data)
        res_all.append(res_tmp)
        avg_time_each_model.append(avg_time_tmp)
    res_all = np.column_stack(res_all)
    most_voted_res = most_voted(res_all)
    
    #return prediction result and avg time of each model to predict a comment
    return most_voted_res, avg_time_each_model

## Run single model

In [None]:
single_res, avg_time = model_prediction(models[0], text_field, label_field, df_test.text.values)

In [None]:
single_res # evaluation dataset result

## Run ensemble model

In [None]:
ensemble_res, avg_time_each_model = ensemble_model_prediction(models, text_field, label_field, df_test.text.values)

In [None]:
ensemble_res # evaluation dataset result

In [None]:
confusion_matrix = ConfusionMatrix(df_test.label, ensemble_res)

In [None]:
confusion_matrix.print_stats() #1 is Positive, 0 is Neutral, and -1 is Negative

## Calculate average time predictions

In [None]:
avg_time_model_1 = avg_time_each_model[0]

In [None]:
df_train = pd.read_csv("ir_train_dataset.csv")

In [None]:
ensemble_res, avg_time_each_model = ensemble_model_prediction(models, text_field, label_field, df_train.text.values)

In [None]:
avg_time_model_1 = (avg_time_each_model[0] + avg_time_model_1)/2

In [None]:
avg_time_model_1 # average time to predict a comment

In [None]:
1/avg_time_model_1 #records per sec