In [2]:
%matplotlib inline
import time
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.optim as optim
import torch.utils.data as data
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import numpy as np
import random

use_cuda = True
device_id = 3
from tensorflow.contrib.keras.python.keras.datasets.imdb import load_data, get_word_index

max_features = 5000
batch_size = 32
epochs = 15
learning_rate = 0.001

In [3]:
(x_train, y_train), (x_test, y_test) = load_data(num_words=max_features)

In [4]:
def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)
    

def pad(tensor, length):
    return torch.cat([tensor, tensor.new(length - tensor.size(0),*tensor.size()[1:]).zero_()])


def sortedText(idx, xs, ys):
    batch_xs = xs[idx]
    batch_ys = ys[idx]
    lengths = np.array([len(x) for x in batch_xs])
    sort_idx = np.argsort(lengths)[::-1]
    return batch_xs[sort_idx], lengths[sort_idx], batch_ys[sort_idx]


def textTensor(idx, xs, ys):
    batch_xs, lengths, batch_ys = sortedText(idx, xs, ys)
    max_length = lengths[0]
    return torch.cat([pad(torch.Tensor(x), max_length).view(max_length, 1)
                      for x in batch_xs], 1).long(), list(lengths), torch.FloatTensor(batch_ys)


def get_last_step_indices(lengths):
    n_lengths = len(lengths)
    rev_lengths = lengths[::-1]
    rev_lengths_sum = torch.LongTensor(rev_lengths).cumsum(0)
    return torch.LongTensor([(n_lengths - i - 1) * length + rev_lengths_sum[i] - 1
                         for i, length in enumerate(rev_lengths)][::-1])


def get_last_step_tensor(packed_sequence, lengths):
    indices = Variable(torch.LongTensor(get_last_step_indices(lengths)))
    if packed_sequence.data.data.is_cuda:
        indices = indices.cuda(packed_sequence.data.data.get_device())
    last_step = packed_sequence.data.index_select(0, indices)
    return last_step

In [22]:
class SimpleEncoder(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, n_layers=1):
        super(SimpleEncoder, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size
        
        self.embedding = nn.Embedding(input_size, embedding_size)
        self.lstm = nn.LSTM(embedding_size, hidden_size)
    
        
    def forward(self, word_input, input_length, hidden=None):
        output = self.embedding(word_input)
        output = pack_padded_sequence(output, input_length)
        output, hidden = self.lstm(output, hidden)
        output, input_length = pad_packed_sequence(output)
        
        return output, hidden
        


In [23]:
x, lengths, y = textTensor(range(32), x_train, y_train)
x, y = Variable(x), Variable(y)
SimpleDecoder(20000, 256, 128)(x, lengths, None)

(Variable containing:
 -5.0188 -5.2886 -5.0984  ...  -5.0488 -4.8925 -4.8270
 -4.7947 -4.6121 -4.9453  ...  -4.8032 -4.7558 -4.8937
 -4.9578 -4.9013 -4.9274  ...  -4.6281 -4.9961 -4.7988
           ...             ⋱             ...          
 -4.8716 -4.7846 -4.6863  ...  -4.7796 -4.6275 -4.9536
 -4.8436 -4.8571 -5.0308  ...  -4.8387 -4.8874 -4.6102
 -5.1017 -4.6260 -4.4362  ...  -4.9277 -4.8420 -4.6734
 [torch.FloatTensor of size 32x128], (Variable containing:
  ( 0 ,.,.) = 
   -0.1453 -0.4150 -0.2249  ...  -0.1753 -0.0190  0.0466
    0.0786  0.2612 -0.0720  ...   0.0701  0.1175 -0.0203
   -0.1127 -0.0562 -0.0823  ...   0.2169 -0.1510  0.0463
             ...             ⋱             ...          
    0.0171  0.1042  0.2024  ...   0.1092  0.2613 -0.0649
   -0.0046 -0.0181 -0.1918  ...   0.0002 -0.0485  0.2287
   -0.2029  0.2728  0.4626  ...  -0.0288  0.0568  0.2255
  [torch.FloatTensor of size 1x32x128], Variable containing:
  ( 0 ,.,.) = 
   -0.5042 -0.6762 -0.3190  ...  -0.3945 -0.

In [21]:
class SimpleDecoder(nn.Module):
    def __init__(self, max_features, embedding_size, hidden_size, n_layers=1):
        super(SimpleDecoder, self).__init__()
        self.embedding = nn.Embedding(max_features, embedding_size)
        self.lstm = nn.LSTM(embedding_size, hidden_size)
        self.dense = nn.Linear(hidden_size, max_features)
        self.softmax = nn.LogSoftmax()
        
    def forward(self, word_inputs, input_lengths, hidden):
        output = self.embedding(word_inputs)
        output = pack_padded_sequence(output, input_lengths)
        output, hidden = self.lstm(output, hidden)
        output = self.softmax(hidden[0][0])
        return output, hidden
        
        

In [None]:
def test():
    idx, y = iter(train_loader).next()
    x, lengths, y = textTensor(idx, x_train, y_train)
    x, y = Variable(x), Variable(y)
    print('input_batches', x.size()) 
    model = IMDB_Classifier(max_features, 2, 32)
    model = model.cuda() if use_cuda else model
    (x, y) = (x.cuda(), y.cuda()) if use_cuda else (x, y)
    output = model(x, lengths)
    print('output_batches', output.size()) 
    cretrion = nn.BCELoss()

    print(cretrion(output, y))

In [None]:


clf = IMDB_Classifier(max_features, 128, batch_size)
clf = clf.cuda(device_id) if use_cuda else clf
optimizer = optim.Adam(clf.parameters(), lr=0.001)
criterion = nn.BCELoss()

start = time.time()
for epoch in range(1, 101):
    losses = 0
    indices = np.random.permutation(np.array(range(25000)))
    for i in range(1, indices.shape[0] / 32 + 1):
        x, lengths, y = textTensor(indices[(i-1)*32:i*32], x_train, y_train)
        x, y = Variable(x), Variable(y)

        (x, y) = (x.cuda(device_id), y.cuda(device_id)) if use_cuda else (x, y)

        output = clf(x, lengths)
        loss = criterion(output, y)
        losses += loss.data[0]
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if i % 10 == 0:
            print("batch: {}, batch_loss: {}".format(i, losses / i))

    print("Epoch: {}, time: {}, loss: {}".format(epoch, timeSince(start, float(epoch) / epochs), losses/(i)))



    total = 0
    correct = 0
    for i in range(250):
        x, lengths, y = textTensor(range(25000)[i*100:(i+1)*100], x_train, y_train)
        #x, lengths, y = textTensor(torch.LongTensor(range(32)), x_train, y_train)
        x, y = Variable(x, volatile=True), Variable(y)
        (x, y) = (x.cuda(device_id), y.cuda(device_id)) if use_cuda else (x, y)
        output = clf(x, lengths)
        output = output > 0.5
        correct += (output.float() == y).sum().data[0]
        total += y.size(0)
    print('Accuracy of the network on the {} texts: {} %'.format(y_test.shape[0], 100. * correct / total))

batch: 10, batch_loss: 0.69545943141
batch: 20, batch_loss: 0.691866585612
batch: 30, batch_loss: 0.690033572912
batch: 40, batch_loss: 0.688602542877
batch: 50, batch_loss: 0.688659090996
batch: 60, batch_loss: 0.687320863207
batch: 70, batch_loss: 0.68498005867
batch: 80, batch_loss: 0.683520531654
batch: 90, batch_loss: 0.68325273527
batch: 100, batch_loss: 0.679330698848
batch: 110, batch_loss: 0.678382878412
batch: 120, batch_loss: 0.672522056599
batch: 130, batch_loss: 0.67177152221
batch: 140, batch_loss: 0.667980640275
batch: 150, batch_loss: 0.66621500055
batch: 160, batch_loss: 0.665801773965
batch: 170, batch_loss: 0.664297933789
batch: 180, batch_loss: 0.662891145547
batch: 190, batch_loss: 0.660225103717
batch: 200, batch_loss: 0.654279608577
batch: 210, batch_loss: 0.651508088481
batch: 220, batch_loss: 0.649304654246
batch: 230, batch_loss: 0.647736446106
batch: 240, batch_loss: 0.645605652158
batch: 250, batch_loss: 0.642742213368
batch: 260, batch_loss: 0.641271293278
