In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
#
from sklearn.cross_validation import train_test_split
#
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data_utils
import torchvision.models as models
#
import time
import matplotlib.pyplot as plt
from keras.utils import np_utils

Using TensorFlow backend.


In [2]:
embedding_matrix = np.load("embedding_matrix.npy")
x1 = np.load("x1.npy")
x2 = np.load("x2.npy")
y = np.load("y.npy")
y = np_utils.to_categorical(y)

In [3]:
SAMPLES = 10#len(y) ### 404224 is divisible by 128, the batch size
USE_CUDA = False
MAX_EPOCH = 1
PRINT_LOSS_AT_EVERY = 1
BATCH_SIZE = 1
EARLY_STOPPING_CRITERIA = "loss" # loss or accuracy
EARLY_STOPPING = 5
HIDDEN_DIM = 200
EMBEDDING_DIM = 300
NUM_CLASS = 2
NLAYERS = 1
DENSE_NEURONS = 125
#time.sleep(9000)

In [56]:
class LSTMTagger(nn.Module):

    def __init__(self, embedding_dim, hidden_dim, vocab_size):
        super(LSTMTagger, self).__init__()
        self.hidden_dim = hidden_dim
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, NLAYERS, batch_first=True, dropout=0.2)
        self.hidden2dense = nn.Linear(hidden_dim*2, DENSE_NEURONS)
        self.dense2class = nn.Linear(DENSE_NEURONS, NUM_CLASS)
        self.simoidlayer = nn.Sigmoid()
        self.batchnorm1 = nn.BatchNorm1d(hidden_dim*2)
        self.batchnorm2 = nn.BatchNorm1d(DENSE_NEURONS)
        self.drop = nn.Dropout(p=0.2)
 
    def init_hidden(self):
        weight = next(self.parameters()).data
        return (Variable(weight.new(NLAYERS, BATCH_SIZE, self.hidden_dim).zero_()), 
                Variable(weight.new(NLAYERS, BATCH_SIZE, self.hidden_dim).zero_()))    

    def forward(self, x1, x2, hidden1, hidden2):
        lstm_out1, hidden1 = self.lstm(self.drop(x1), hidden1)
        lstm_out2, hidden2 = self.lstm(self.drop(x2), hidden2)
        
        #take the hiddenvalues from the last element in the sequence
        #x1 = self.drop(lstm_out1[:,-1,:])
        #x2 = self.drop(lstm_out2[:,-1,:])
        x1 = self.drop(torch.mean(lstm_out1,1))
        x2 = self.drop(torch.mean(lstm_out2,1))
        x = torch.cat((x1, x2),1) # concatenation doubles dimension
        #x = self.batchnorm1(x)
        #x = self.drop(x)        
        #
        #x = lstm_out.select(1, len(sentence)-1).contiguous() # batchfirst = True
        x = x.view(-1, self.hidden_dim*2)
        x = F.relu(self.hidden2dense(x))
        x = self.batchnorm2(x)
        x = self.drop(x)        
        x = self.dense2class(x)
        x = self.simoidlayer(x)
        return x
    
def repackage_hidden(h):
    """Wraps hidden states in new Variables, to detach them from their history."""
    if type(h) == Variable:
        return Variable(h.data)
    else:
        return tuple(repackage_hidden(v) for v in h)    


In [57]:
net = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(embedding_matrix))

In [58]:
# split into train/test
X1_train, X1_valid, y_train, y_valid = train_test_split(x1[:SAMPLES], y[:SAMPLES], test_size=0.2, random_state=0)
X2_train, X2_valid, _, _ = train_test_split(x2[:SAMPLES], y[:SAMPLES], test_size=0.2, random_state=0)
print "Number of training samples = ", len(y_train)
print "Number of validation samples = ", len(y_valid)

if USE_CUDA == True:
    torch.backends.cudnn.benchmark = True
    net.cuda()
#criterion = nn.CrossEntropyLoss()
criterion = nn.BCELoss()
#optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
optimizer = torch.optim.Adam(net.parameters())

trainfeatures1 = torch.from_numpy(X1_train).long()
traintargets1 = torch.from_numpy(y_train).float()
trainset1 = data_utils.TensorDataset(trainfeatures1, traintargets1)
trainloader1 = data_utils.DataLoader(trainset1, batch_size=BATCH_SIZE, shuffle=True)

trainfeatures2 = torch.from_numpy(X2_train).long()
traintargets2 = torch.from_numpy(y_train).float()
trainset2 = data_utils.TensorDataset(trainfeatures2, traintargets2)
trainloader2 = data_utils.DataLoader(trainset2, batch_size=BATCH_SIZE, shuffle=True)

validfeatures1 = torch.from_numpy(X1_valid).long()
validtargets1 = torch.from_numpy(y_valid).float()
validset1 = data_utils.TensorDataset(validfeatures1, validtargets1)
validloader1 = data_utils.DataLoader(validset1, batch_size=BATCH_SIZE, shuffle=True)

validfeatures2 = torch.from_numpy(X2_valid).long()
validtargets2 = torch.from_numpy(y_valid).float()
validset2 = data_utils.TensorDataset(validfeatures2, validtargets2)
validloader2 = data_utils.DataLoader(validset2, batch_size=BATCH_SIZE, shuffle=True)

embedding = nn.Embedding(embedding_matrix.shape[0], 300)
embedding.weight.data = torch.from_numpy(embedding_matrix).float()
if USE_CUDA == True:
    embedding.weight.data = torch.from_numpy(embedding_matrix).float().cuda()
embedding.weight.requires_grad = False

Number of training samples =  8
Number of validation samples =  2


In [59]:
def train():
    hidden1 = net.init_hidden()
    hidden2 = net.init_hidden()
    net.train()
    running_loss = 0.0
    train_loss = 0.0
    for i, (data1,data2) in enumerate(zip(trainloader1, trainloader2), 0):
        hidden1 = repackage_hidden(hidden1)
        hidden2 = repackage_hidden(hidden2)
        inputs1, _ = data1
        inputs2, labels = data2
        if USE_CUDA == True:
            inputs1 = inputs1.cuda()
            inputs2 = inputs2.cuda()
            labels = labels.cuda()
        labels = Variable(labels)
        inputs1 = Variable(inputs1)
        inputs1 = embedding(inputs1)
        #
        inputs2 = Variable(inputs2)
        inputs2 = embedding(inputs2)
        #
        #inputs = torch.cat((inputs1, inputs2),1)
        ###inputs = inputs.permute(1,0,2,)
        optimizer.zero_grad()
        outputs = net(inputs1, inputs1, hidden1, hidden2)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.data[0]
        train_loss += loss.data[0]
        if i % PRINT_LOSS_AT_EVERY == PRINT_LOSS_AT_EVERY-1: 
            print('[%d, %5d] Train loss: %.3f' % (epoch+1, i+1, running_loss / PRINT_LOSS_AT_EVERY))
            running_loss = 0.0
    return train_loss

In [60]:
def valid():
    hidden1 = net.init_hidden()
    hidden2 = net.init_hidden()
    net.eval()
    running_loss = 0.0
    valid_loss = 0.0
    correct = 0
    for i, (data1,data2) in enumerate(zip(validloader1, validloader2), 0):
        hidden1 = repackage_hidden(hidden1)
        hidden2 = repackage_hidden(hidden2)
        inputs1, _ = data1
        inputs2, labels = data2
        if USE_CUDA == True:
            inputs1 = inputs1.cuda()
            inputs2 = inputs2.cuda()
            labels = labels.cuda()        
        labels = Variable(labels)
        inputs1 = Variable(inputs1, volatile=True)
        inputs1 = embedding(inputs1)
        #
        inputs2 = Variable(inputs2, volatile=True)
        inputs2 = embedding(inputs2)
        #
        #inputs = torch.cat((inputs1, inputs2),1)
        ###inputs = inputs.permute(1,0,2)
        #
        outputs = net(inputs1, inputs1, hidden1, hidden2)
        pred = outputs.data.max(1)[1] 
        correct += pred.eq(labels.data.max(1)[1].long()).cpu().sum()
        loss = criterion(outputs, labels)
        running_loss += loss.data[0]
        valid_loss += loss.data[0]
        if i % PRINT_LOSS_AT_EVERY == PRINT_LOSS_AT_EVERY-1:
            print('[%d, %5d] Valid loss: %.3f' % (epoch+1, i+1, running_loss / PRINT_LOSS_AT_EVERY))
            running_loss = 0.0
    return correct, valid_loss

In [61]:
train_loss = []
valid_loss = []
accuracy = 0.0
delta_loss = 1e15
early_stop = 0
program_starts = time.time()
print('Training started ....')    
for epoch in range(MAX_EPOCH):
    train_loss.append(train()/len(trainloader1))
    correct, _ = valid()
    valid_loss.append(_/len(validloader1))
    #
    plt.plot(train_loss, label="Train loss")
    plt.plot(valid_loss, label="Valid loss")
    plt.legend()
    plt.savefig("train-valid_loss.eps")
    plt.clf()
    #
    print('Validation set accuracy: {}/{} ({:.2f}%)'.format(correct, len(validloader1)*BATCH_SIZE, 
                                                            100. * correct / (len(validloader1)*BATCH_SIZE)))
    print('Delta between train-loss and valid-loss: {:0.4f}\n'.format(abs(train_loss[epoch]-valid_loss[epoch])))    
    if EARLY_STOPPING_CRITERIA == "accuracy":
        if accuracy < (float(correct) / (len(validloader1)*BATCH_SIZE)):
            torch.save(net, "net_best-model.pth")
            accuracy = float(correct) / (len(validloader1)*BATCH_SIZE)
            print "This is the current best model based on accuracy. Updated!"
            early_stop = 0
    elif EARLY_STOPPING_CRITERIA == "loss":
        if abs(train_loss[epoch]-valid_loss[epoch]) < delta_loss:
            torch.save(net, "net_best-model.pth")
            delta_loss = abs(train_loss[epoch]-valid_loss[epoch])
            print "This is the current best model based on loss. Updated!"
            early_stop = 0        
    early_stop += 1
    if early_stop == EARLY_STOPPING:
        if EARLY_STOPPING_CRITERIA == "accuracy":
            print "Validation accuracy is not better than {:.2f}%  over the last {} epochs. Training stopped after epoch {}".format(accuracy*100., EARLY_STOPPING, epoch+1)
            break
        elif EARLY_STOPPING_CRITERIA == "loss":
            print "Delta between train-loss and valid-loss is not less than {:0.4f}  over the last {} epochs. Training stopped after epoch {}".format(delta_loss, EARLY_STOPPING, epoch+1)
            break        
now = time.time()
print("It has been {0} seconds since the training loop started".format(now - program_starts))
torch.save(net, "net.pth")
print('Finished Training\n')

Training started ....
[1,     1] Train loss: 0.688
[1,     2] Train loss: 0.700
[1,     3] Train loss: 0.687
[1,     4] Train loss: 0.686
[1,     5] Train loss: 0.701
[1,     6] Train loss: 0.684
[1,     7] Train loss: 0.684
[1,     8] Train loss: 0.683
[1,     1] Valid loss: nan
[1,     2] Valid loss: nan
Validation set accuracy: 2/2 (100.00%)
Delta between train-loss and valid-loss: nan

It has been 3.66819095612 seconds since the training loop started
Finished Training



In [45]:
x = torch.rand(5,10,15)

In [48]:
torch.mean(x,1).size()

torch.Size([5, 1, 15])