In [9]:
import preprocess
import utils
import torch as th
import torch.nn as nn
import torch.functional as F
import pandas as pd
import csv
import pickle as pkl
import random
from sklearn.utils import gen_batches
import numpy as np
import scorer

# Loading data

In [2]:
load_preprocessed_data = False

if load_preprocessed_data:
    path_to_data = 'data.pkl'
    path_to_vocab_dict = 'vocab_dict.pkl'
    
    data = utils.load_preprocessed_data(path_to_data)
    vocab_dict = utils.load_vocab_dict(path_to_vocab_dict)
    (headlines_train, stances_train, bodies_train) = data['train']
    (headlines_dev, stances_dev, bodies_dev) = data['dev']
else:
    train_stances_path = 'train_stances.csv'
    train_bodies_path = 'train_bodies.csv'

    stances_data =  pd.read_csv(train_stances_path)
    bodies_data = pd.read_csv(train_bodies_path)

    data = preprocess.extract_data(stances_data, bodies_data)
    vocab_dict = data['dict']
    (headlines_train, stances_train, bodies_train) = data['train']
    (headlines_dev, stances_dev, bodies_dev) = data['dev']

# Training the model

In [3]:
is_cuda = True

if is_cuda:
    device = th.device('cuda:0')
else:
    device = th.device('cpu')

In [4]:
class CBOW_classifier(nn.Module):
    def __init__(self, vocab_dict, embedding_dim, num_layers=0, hidden_dim=50, dropout=0.5):
        super(CBOW_classifier, self).__init__()     
        output_dim = 4
        self.embedding = nn.Embedding(len(vocab_dict), embedding_dim, padding_idx=vocab_dict['<pad>'])
        if num_layers > 0:
            first_layer = nn.Sequential(nn.Linear(2*embedding_dim, hidden_dim),nn.ReLU())
            hidden_layers = [nn.Sequential(nn.Linear(hidden_dim, hidden_dim),nn.ReLU()) for i in range(num_layers-1)]
            self.out = nn.Sequential(nn.Dropout(dropout), first_layer, *hidden_layers, nn.Dropout(dropout), nn.Linear(hidden_dim, output_dim))
        else:
            self.out = nn.Sequential(nn.Dropout(dropout),nn.Linear(2*embedding_dim, output_dim))        
        
    def forward(self, headlines, bodies):
        headlines_embedded = th.sum(self.embedding(headlines), axis=1)
        bodies_embedded = th.sum(self.embedding(bodies), axis=1)
        embeds = th.cat((headlines_embedded, bodies_embedded), 1)
        out = self.out(embeds)
        return out

In [5]:
embedding_dim = 1000
model = CBOW_classifier(vocab_dict, embedding_dim, num_layers=5, hidden_dim=150, dropout=0.5).to(device)
lr = 0.001
loss_function = nn.CrossEntropyLoss()
optimizer = th.optim.Adam(model.parameters(), lr=lr)

In [6]:
num_epochs = 80
batch_size = 256
num_samples = len(headlines_train)

slices = list(gen_batches(num_samples, batch_size))
dev_slices = list(gen_batches(len(headlines_dev), batch_size))

for epoch in range(num_epochs):
    epoch_loss = 0
    model.train()
    for s in slices:

        headlines_batch = headlines_train[s].to(device)
        stances_batch = stances_train[s].to(device)
        bodies_batch = bodies_train[s].to(device)
        
        optimizer.zero_grad()  
        pred_labels = model(headlines_batch, bodies_batch)
        loss = loss_function(pred_labels, stances_batch)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    accs = []
    for s in dev_slices:   
        acc = utils.compute_accuracy(
            model, 
            headlines_dev[s].to(device), 
            stances_dev[s].to(device), 
            bodies_dev[s].to(device)
        )
        accs.append(acc)
    acc = sum(accs) / len(accs)
    
    slices_rand = np.random.permutation(slices)
    trainaccs = []
    for i in range(6):
        s = slices_rand[i]
        trainaccs.append(utils.compute_accuracy(
            model,
            headlines_train[s].to(device),
            stances_train[s].to(device),
            bodies_train[s].to(device)
        ))
    trainacc = sum(trainaccs) / len(trainaccs)
    
    print('Epoch:', epoch, "Accuracy: %f" % acc, "Train accuracy: %f" % trainacc)
    print('\tLoss:', epoch_loss / len(slices))

Epoch: 0 Accuracy: 0.736736 Train accuracy: 0.749349
	Loss: 0.8071986095161195
Epoch: 1 Accuracy: 0.809517 Train accuracy: 0.813151
	Loss: 0.6258081433120047
Epoch: 2 Accuracy: 0.845259 Train accuracy: 0.854818
	Loss: 0.5031262808924268
Epoch: 3 Accuracy: 0.863938 Train accuracy: 0.866536
	Loss: 0.40800694314537533
Epoch: 4 Accuracy: 0.889453 Train accuracy: 0.908203
	Loss: 0.3466499261320776
Epoch: 5 Accuracy: 0.902051 Train accuracy: 0.918620
	Loss: 0.2935646108950779
Epoch: 6 Accuracy: 0.919922 Train accuracy: 0.936198
	Loss: 0.2563579182146461
Epoch: 7 Accuracy: 0.921289 Train accuracy: 0.932943
	Loss: 0.23273842831610875
Epoch: 8 Accuracy: 0.927148 Train accuracy: 0.945312
	Loss: 0.20390676915835423
Epoch: 9 Accuracy: 0.926829 Train accuracy: 0.953125
	Loss: 0.1907242870278609
Epoch: 10 Accuracy: 0.936621 Train accuracy: 0.970703
	Loss: 0.1741378073028888
Epoch: 11 Accuracy: 0.938379 Train accuracy: 0.964844
	Loss: 0.15981585461242942
Epoch: 12 Accuracy: 0.940501 Train accuracy: 0

# Saving data

In [7]:
save_data = True

if save_data:
    vocab_dict_path = 'vocab_dict_new.pkl'
    model_weights_path = 'cbow_09757.pth'
    data_path = 'data_new.pkl'
    
    # utils.save_vocab_dict(vocab_dict_path, vocab_dict)
    utils.save_model_weights(model_weights_path, model)
    # utils.save_preprocessed_data(data_path, {
    #     'train': (headlines_train, stances_train, bodies_train),
    #     'dev': (headlines_dev, stances_dev, bodies_dev)
    # })

# Loading model

The following code loads the model. There is no need to run this part if the model was trained in this notebook.

In [None]:
load_model = False

if load_model:
    path_to_vocab_dict = 'vocab_dict.pkl'
    path_to_model_weights = 'cbow_09502.pth'

    embedding_dim = 1000
    model = CBOW_classifier(vocab_dict, embedding_dim, num_layers=1, dropout=0.1)

    vocab_dict = utils.load_vocab_dict(path_to_vocab_dict)
    model = utils.load_model_weights(model, path_to_model_weights)

# Evaluating the model

the following code loads test data, runs them through the model, and saves them to a CSV file

In [40]:
bodies_data = pd.read_csv('competition_test_bodies.csv')
stances_data =  pd.read_csv('competition_test_stances.csv')

In [41]:
headlines, _, bodies = preprocess.transform_data(stances_data, bodies_data, vocab_dict)

In [42]:
slices = list(gen_batches(len(headlines), 200))
predictions = []
model.eval()
for s in slices:   
    with th.no_grad():
        outputs = model.forward(
            headlines[s].to(device), 
            bodies[s].to(device)
        ).argmax(axis=1)

        predictions += outputs.tolist()

In [43]:
predictions_words = preprocess.transform_back_stances(predictions)

In [44]:
prediction_data = list(zip(
    stances_data['Headline'].values.tolist(), 
    stances_data['Body ID'].values.tolist(), 
    predictions_words
))

In [45]:
with open('predictions.csv', 'w') as pred_file:
    writer = csv.writer(pred_file)
    writer.writerow(['Headline', 'Body ID', 'Stance'])
    writer.writerows(prediction_data)

In [46]:
gold_filename = 'competition_test_stances.csv'

gold_labels = scorer.load_dataset(gold_filename)
test_labels = scorer.load_dataset('predictions.csv')

test_score, cm = scorer.score_submission(gold_labels, test_labels)
null_score, max_score = scorer.score_defaults(gold_labels)
scorer.print_confusion_matrix(cm)
print(scorer.SCORE_REPORT.format(max_score, null_score, test_score))

CONFUSION MATRIX:
-------------------------------------------------------------
|           |   agree   | disagree  |  discuss  | unrelated |
-------------------------------------------------------------
|   agree   |   1104    |    13     |    722    |    64     |
-------------------------------------------------------------
| disagree  |    422    |     8     |    221    |    46     |
-------------------------------------------------------------
|  discuss  |    991    |    19     |   3258    |    196    |
-------------------------------------------------------------
| unrelated |   4893    |    67     |   8688    |   4701    |
-------------------------------------------------------------
ACCURACY: 0.357

MAX  - the best possible score (100% accuracy)
NULL - score as if all predicted stances were unrelated
TEST - score based on the provided predictions

||    MAX    ||    NULL   ||    TEST   ||
|| 11651.25  ||  4587.25  ||  6142.25  ||



In [47]:
len(vocab_dict)

20295