In [1]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import numpy as np
import pandas as pd
import warnings; warnings.filterwarnings('ignore')
from keras.preprocessing.text import Tokenizer
from keras.preprocessing import sequence
import random

Using TensorFlow backend.


In [2]:
from data_loader import MRLoader
batch_size = 50

loader = MRLoader(batch_size)
train_loader, test_loader = loader.get_dataset()
tokenizer = loader.tokenizer

In [3]:
from data_loader import Glove
glove = Glove(300)
vocab_size, embedding_dim = glove.vocab_size, glove.embedding_dim
embedding_matrix = glove.get_embedding(tokenizer)

In [4]:
class CNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, fixed_length=300, kernel_num=100, kernel_size=[3, 4, 5]):
        super(CNN, self).__init__()
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        
        self.embedding = nn.Embedding(self.vocab_size, self.embedding_dim)
        self.conv = nn.ModuleList([
            nn.Conv2d(1, kernel_num, (i, self.embedding_dim)) 
                for i in kernel_size
        ])
        self.maxpools = [
            nn.MaxPool2d((fixed_length+1-i,1)) for i in kernel_size
        ]
        self.dropout = nn.Dropout(0.5)
        self.linear = nn.Linear(len(kernel_size)*kernel_num, 1)
        
    def set_embedding_weights(self, embedding_matrix):
        self.embedding.weight = nn.Parameter(embedding_matrix)
    
    def forward(self, inp):
        x = self.embedding(inp).unsqueeze(1)
        x = [
            self.maxpools[i](
                torch.tanh(conv(x))
            ).squeeze(3).squeeze(2) 
                for i, conv in enumerate(self.conv)
        ]
        x = torch.cat(x, dim=1)
        x = self.dropout(x)
        x = self.linear(x)
        y = F.sigmoid(x)
        return y

In [16]:
model = CNN(
        vocab_size=vocab_size,
        embedding_dim=embedding_dim,
        fixed_length=300
)
for name, w in model.named_parameters():
    if 'weight' in name:
        nn.init.xavier_normal(w)
model.set_embedding_weights(embedding_matrix)

In [17]:
learning_rate = 0.05
num_epochs = 20

In [18]:
histories = []
def train_model(num_epochs, val_index=0):
    hist = np.zeros(num_epochs*(len(train_loader)))
    j = 0
    for e in range(num_epochs):
        for i, (x, y) in enumerate(train_loader):
            if i != val_index:
                # Clear stored gradient
                model.zero_grad()

                # Forward pass
                y_pred = model(x.long())
                if len(x) < batch_size:
                    y_pred = y_pred[:len(x)]

                loss = loss_fn(y_pred.view(-1, 1), y.view(-1, 1))
                hist[j] = loss.item()
                
                # Zero out gradient, else they will accumulate between epochs
                optimiser.zero_grad()

                # Backward pass
                loss.backward()
                
                val_index += 1
                if val_index == len(train_loader):
                    val_index = 0
                # Update parameters
                optimiser.step()
            j += 1
#         val_set = random.choice([(x, y) for x, y in train_loader])
        val_set = [(x, y) for x, y in train_loader][val_index]
        with torch.no_grad():
            model.eval()
            x, y = val_set
            y_pred = model(x.long())
            if len(y) < batch_size:
                y_pred = y_pred[:len(y)]
            full_num = len(y)
            correct_num = (y_pred.reshape(-1, 1).round() == y).sum().item()
            print("epoch {}, val score: {}".format(e + 1, correct_num / full_num))
#     return hist

In [19]:
def test_score():
    full_num = 0
    correct_num = 0
    with torch.no_grad():
        model.eval()
        losses = []
        for i, (x, y) in enumerate(test_loader):
            # Forward pass
            y_pred = model(x.long())
            if len(y) < batch_size:
                y_pred = y_pred[:len(y)]

            full_num += len(y)
            correct_num += (y_pred.reshape(-1, 1).round() == y).sum().item()
    print(correct_num / full_num)

In [9]:
loss_fn = torch.nn.BCELoss()
optimiser = torch.optim.Adadelta(model .parameters(), lr=learning_rate, weight_decay=.03)
train_model(20)
test_score()

epoch 1, val score: 0.84
epoch 2, val score: 0.94
epoch 3, val score: 0.9
epoch 4, val score: 0.98
epoch 5, val score: 0.94
epoch 6, val score: 0.98
epoch 7, val score: 0.94
epoch 8, val score: 0.96
epoch 9, val score: 0.98
epoch 10, val score: 0.94
epoch 11, val score: 0.94
epoch 12, val score: 0.92
epoch 13, val score: 0.96
epoch 14, val score: 0.9
epoch 15, val score: 0.94
epoch 16, val score: 0.94
epoch 17, val score: 0.92
epoch 18, val score: 0.94
epoch 19, val score: 0.96
epoch 20, val score: 0.94
0.792
