# Evaluate

In [25]:
import logging
from os import listdir
from os.path import join
import pickle

import torch
from torch.autograd import Variable
import numpy as np
from sklearn.metrics import f1_score, confusion_matrix

from train import read_data
from model import NativeLanguageCNN

## Read dev set data

In [4]:
feature_dir = "data/features/speech_transcriptions/ngrams/2"
label = "data/labels/dev/labels.dev.csv"

max_length = 200

In [40]:
# Read feature dictionary
with open(join(feature_dir, 'dict.pkl'), 'rb') as fpkl:
    (feature_dict, feature_rev_dict) = pickle.load(fpkl)
n_features = len(feature_dict)

# Read dev set data + labels
(dev_mat, dev_label, lang_dict) = read_data(join(feature_dir, 'dev'),
                                            label, max_length, n_features)
dev_lang = [lang_dict[lb] for lb in dev_label]
n_lang = len(lang_dict)
print(dev_mat.shape)
print(len(dev_label))

(4244, 200)
4244


## Restore model from state

In [34]:
model_state_file = "model/2017-05-30-153741/model-state-0020.pkl"

max_length = 200
embed_dim = 500
channel = 500

In [35]:
def restore_model(model_state_file, max_length, embed_dim, channel, cuda=False):
    nlcnn_model = NativeLanguageCNN(n_features, embed_dim, 0, channel, n_lang)
    nlcnn_model.load_state_dict(torch.load(model_state_file))
    nlcnn_model.eval()
    if cuda:
        nlcnn_model.cuda()
    return nlcnn_model

In [36]:
nlcnn_model = restore_model(model_state_file, max_length, embed_dim, channel)

## Evaluate restored model

In [37]:
def evaluate_model(model, dev_mat, dev_label, lang_dict, cuda=False):
    dev_lang = [lang_dict[lb] for lb in dev_label]
    dev_mat_var = Variable(torch.from_numpy(dev_mat).cuda() if cuda
                           else torch.from_numpy(dev_mat))
    dev_score = model(dev_mat_var)
    dev_pred = np.argmax(dev_score.data.cpu().numpy(), axis=1)
    dev_pred_lang = [lang_dict[lb] for lb in dev_pred]
    
    conf_mat = confusion_matrix(dev_lang, dev_pred_lang)
    f1 = f1_score(dev_lang, dev_pred_lang, average='weighted')
    return (conf_mat, f1)

In [38]:
(conf_mat, f1) = evaluate_model(nlcnn_model, dev_mat, dev_label, lang_dict)

In [41]:
conf_mat

array([[88, 16, 40, 38, 32, 45, 14, 24, 27, 42, 18],
       [69, 26, 38, 31, 38, 38, 28, 32, 22, 40, 25],
       [75, 19, 39, 27, 34, 39, 23, 36, 20, 39, 26],
       [74, 31, 34, 36, 32, 41, 20, 34, 23, 34, 17],
       [70, 12, 46, 29, 29, 48, 14, 31, 27, 57, 30],
       [75, 22, 45, 22, 46, 42, 23, 31, 19, 40, 32],
       [60, 24, 37, 38, 36, 43, 21, 33, 29, 47, 22],
       [82, 22, 38, 34, 37, 25, 22, 36, 22, 44, 37],
       [78, 15, 54, 37, 35, 31, 14, 36, 22, 43, 25],
       [53, 25, 44, 48, 28, 46, 20, 26, 40, 35, 20],
       [77, 20, 36, 32, 33, 34, 22, 31, 20, 42, 19]])

In [42]:
f1

0.088525796331743206