In [1]:
import torch
from torch import nn
import torch.nn.functional as F
from torch.optim import Adam, SGD
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import ReduceLROnPlateau, StepLR
from torch.nn.utils import clip_grad_norm_
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report, f1_score, accuracy_score
from tqdm import tqdm_notebook as tqdm
import torchnet as tnt
#from keras.preprocessing.sequence import pad_sequences
import warnings
warnings.filterwarnings('ignore')
from utils import *
import matplotlib.pyplot as plt
%matplotlib inline 
from deep_models import *

Using CNTK backend


using gpu


In [2]:
def predict_proba(model, data_iter):
    model.eval()
    y_pred = []
    y_true = []
    with torch.no_grad():
        for headlines, bodies, labels in tqdm(data_iter):
            headlines = torch.from_numpy(headlines).cuda().long()
            bodies = torch.from_numpy(bodies).cuda().long()
            y_true.extend(labels)
            out, _, _ = model(headlines, bodies)
            y_pred.append(out.cpu().data.numpy())
    return np.concatenate(y_pred, axis=0), y_true

def test_batch(filename, batch_size=64, max_len_h=50, max_len_b=100, data_aug=False):
        # load headline, body, stance from a file
        data = pickle.load(open(filename, 'rb'))
        headlines = np.array(data['h'])
        bodies = np.array(data['b'])
        y = np.array(data['y'])
        
        sorted_h = headlines
        sorted_b = bodies
        sorted_y = y

        assert len(sorted_h) == len(sorted_b) == len(sorted_y)

        # using the first 200 words in articleBodies
        for i in range(0, len(sorted_h), batch_size):
            batch_h = sorted_h[i:i+batch_size]
            batch_b = sorted_b[i:i+batch_size]
            batch_y = sorted_y[i:i+batch_size]

            # get max length of headlines and bodies in the batch
            len_batch_h = [len(v) for v in batch_h]
            len_batch_b = [len(v) for v in batch_b]
            maxlen1 = max_len_h if max_len_h < max(len_batch_h) else max(len_batch_h)
            maxlen2 = max_len_b if max_len_b < max(len_batch_b) else max(len_batch_b)

            # padding to the max length
            batch_h = pad_sequences(batch_h, maxlen1, padding='pre', truncating='post')
            batch_b = pad_sequences(batch_b, maxlen2, padding='pre', truncating='post')
            yield (batch_h, batch_b, batch_y)

        # using the last 200 words in articleBodies
        if data_aug:
            for i in range(0, len(sorted_h), batch_size):
                batch_h = sorted_h[i:i+batch_size]
                batch_b = sorted_b[i:i+batch_size]
                batch_y = sorted_y[i:i+batch_size]

                # get max length of headlines and bodies in the batch
                len_batch_h = [len(v) for v in batch_h]
                len_batch_b = [len(v) for v in batch_b]
                maxlen1 = max_len_h if max_len_h < max(len_batch_h) else max(len_batch_h)
                maxlen2 = max_len_b if max_len_b < max(len_batch_b) else max(len_batch_b)

                # padding to the max length
                batch_h = pad_sequences(batch_h, maxlen1, padding='pre', truncating='pre')
                batch_b = pad_sequences(batch_b, maxlen2, padding='pre', truncating='pre')
                yield (batch_h, batch_b, batch_y)
                
def test_model(model, data_iter):
    model.eval()
    y_pred = []
    y_true = []
    with torch.no_grad():
        for headlines, bodies, labels in tqdm(data_iter):
            headlines = torch.from_numpy(headlines).cuda().long()
            bodies = torch.from_numpy(bodies).cuda().long()
            y_true.extend(labels)
            labels = torch.from_numpy(labels).cuda().long()
            out, _, _ = model(headlines, bodies)
            _, index = torch.max(out, dim=1)
            y_pred.extend(index.cpu().data.numpy())

    model.train()
    print('classification report:')
    print('accuracy: %.3f' % accuracy_score(y_true, y_pred))
    print(classification_report(y_true, y_pred))
    print('macro f1: %.3f' % f1_score(y_true, y_pred, average='macro'))
    print('score: %.3f' % (get_score(y_true, y_pred) / get_score(y_true, y_true)))

In [3]:
pretrained_file_name = './tmp/pretrained.pkl'
train_filename = './tmp/train_ids.pkl'
val_filename = './tmp/val_ids.pkl'
test_filename = './tmp/test_ids.pkl'
vecs = pickle.load(open(pretrained_file_name, 'rb'))

In [9]:
model_name = 'Esim'
if model_name == 'EmbeddingBag':
    model = EmbeddingBag(vecs).cuda()
elif model_name == 'Esim':
    model = Esim(vecs).cuda()

results = []
for model_path in  ['./models/Esim_2_acc_0.777', './models/Esim_3_acc_0.784']:
    model = Esim(vecs).cuda()
    model.load_state_dict(torch.load(model_path))
    y_pred, tmp = predict_proba(model, test_batch(test_filename, batch_size=64, max_len_b=100))

    results.append(y_pred)
    y_true = tmp





In [13]:
pred_sum = 0 
for item in results:
    pred_sum += item

pred_sum = results[1]
final_pred = np.argmax(pred_sum, axis=1)
score = get_score(y_true, final_pred) / get_score(y_true, y_true)

print('classification report:')
print('accuracy: %.3f' % accuracy_score(y_true, final_pred))
print(classification_report(y_true, final_pred))
print('macro f1: %.3f' % f1_score(y_true, final_pred, average='macro'))
print('score: %.3f' % score)

classification report:
accuracy: 0.849
             precision    recall  f1-score   support

          0       0.46      0.53      0.49      1903
          1       0.00      0.00      0.00       697
          2       0.63      0.74      0.68      4464
          3       0.96      0.94      0.95     18349

avg / total       0.84      0.85      0.84     25413

macro f1: 0.531
score: 0.787


In [4]:
model = Esim(vecs).cuda()
model.load_state_dict(torch.load('./models/Esim_3_acc_0.784'))
y_pred, tmp = predict_proba(model, test_batch(test_filename, batch_size=64, max_len_b=100))
y_pred.shape




(25413, 4)

In [6]:
import pickle
lgb_pred = pickle.load(open('./pred/lgb.pkl', 'rb'))

In [13]:
pred_sum = 0.2 * y_pred + 0.8 * lgb_pred
final_pred = np.argmax(pred_sum, axis=1)
y_true = tmp
score = get_score(y_true, final_pred) / get_score(y_true, y_true)

print('classification report:')
print('accuracy: %.3f' % accuracy_score(y_true, final_pred))
print(classification_report(y_true, final_pred))
print('macro f1: %.3f' % f1_score(y_true, final_pred, average='macro'))
print('score: %.3f' % score)

classification report:
accuracy: 0.891
             precision    recall  f1-score   support

          0       0.55      0.50      0.52      1903
          1       0.50      0.00      0.00       697
          2       0.74      0.80      0.77      4464
          3       0.96      0.99      0.97     18349

avg / total       0.88      0.89      0.88     25413

macro f1: 0.566
score: 0.816


In [15]:
for i in range(4):
    print(np.corrcoef(y_pred[:, i], lgb_pred[:, i]))

[[1.         0.75058765]
 [0.75058765 1.        ]]
[[1.         0.09864944]
 [0.09864944 1.        ]]
[[1.         0.79555826]
 [0.79555826 1.        ]]
[[1.         0.86601056]
 [0.86601056 1.        ]]
