In [1]:
import torch
import torch.nn as nn
from torch import optim
from torch.autograd import Variable
import torch.nn.functional as F
import torch.nn.init as init

import numpy as np

import random
from queue import *

from seq2seqLoader import *

import matplotlib
matplotlib.use("Agg")
import matplotlib.pylab as plt
from matplotlib.pyplot import imshow
%matplotlib inline

In [2]:
train_filename = "/data2/t2t/synth_data/relabel/train.orig"
target_filename = "/data2/t2t/synth_data/relabel/train.interior_relabel"
data_loader = T2TDataLoader(train_filename,target_filename)
training_data = data_loader.get_data()
print(len(training_data))
training_pairs = [(random.choice(training_data))
                  for i in range(len(training_data))]
print(training_pairs[0][0])
print(training_pairs[0][1])

100000
[111, 109, 109, 109, 109, 101, 60, 8, 110, 95, 109, 89, 62, 100, 110, 110, 92, 109, 109, 109, 99, 83, 10, 110, 58, 98, 110, 63, 87, 110, 110, 16, 109, 109, 109, 97, 50, 64, 110, 46, 52, 110, 43, 96, 110, 110, 112]
[115, 113, 113, 113, 113, 101, 112, 8, 114, 111, 113, 89, 112, 100, 114, 114, 111, 113, 113, 113, 99, 111, 10, 114, 112, 98, 114, 112, 87, 114, 114, 109, 113, 113, 113, 97, 112, 64, 114, 110, 52, 114, 110, 96, 114, 114, 116]


In [3]:
class EncoderLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, embedding_size):
        super(EncoderLSTM, self).__init__()
        self.word_embeddings = nn.Embedding(input_size, embedding_size)    
        self.lstm = nn.LSTM(input_size = embedding_size, hidden_size = hidden_size,num_layers = 1)
        

    def forward(self, input, hidden, c):
        embedded = self.word_embeddings(input).view(1, 1, -1)

        output = embedded
        output,(hidden,c) = self.lstm(output, (hidden, c))
        return output,hidden,c

    def initCells(self):
        
        if use_cuda:
            return Variable(torch.zeros(1, 1, hidden_size).cuda(gpu_no))
        else:
            return Variable(torch.zeros(1, 1, hidden_size))
        

In [4]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self):
        result = Variable(torch.zeros(1, 1, self.hidden_size))
        if use_cuda:
            return result.cuda(gpu_no)
        else:
            return result

In [5]:
def train(input_variable, target_variable, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion):
    encoder_hidden = encoder.initCells()
    encoder_c = encoder.initCells()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = len(input_variable)
    target_length = len(target_variable)

    loss = 0
    
    if use_cuda:
        encoder_hidden = encoder_hidden.cuda()

    for ei in range(input_length):
        encoder_output, encoder_hidden,encoder_c = encoder(input_variable[ei], encoder_hidden,encoder_c)

    decoder_hidden = encoder_hidden

    for di in range(target_length):
        decoder_output, decoder_hidden = decoder(
            target_variable[di], decoder_hidden)
        loss += criterion(decoder_output, target_variable[di])

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.data[0] / target_length

In [6]:
import time
import math


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [7]:
def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_variable = Variable(torch.LongTensor(training_pair[0]).view(-1, 1))
        target_variable = Variable(torch.LongTensor(training_pair[1]).view(-1, 1))
        
        if use_cuda:
            input_variable = input_variable.cuda(gpu_no)
            target_variable = target_variable.cuda(gpu_no)

        loss = train(input_variable, target_variable, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))
            
            torch.save(encoder.state_dict(), './pickles/encoder_seq2seq.pth')
            torch.save(decoder.state_dict(), './pickles/decoder_seq2seq.pth')


In [8]:
hidden_size = 256
embedding_size = 256
gpu_no = 1
# 0-99,+,-,*,/,(,)
encoder_vocab_size = 113
decoder_vocab_size = 117
use_cuda = False
encoder1 = EncoderLSTM(encoder_vocab_size, hidden_size,embedding_size)
decoder1 = DecoderRNN(hidden_size, decoder_vocab_size)


if use_cuda:
    encoder1 = encoder1.cuda(gpu_no)
    decoder1 = decoder1.cuda(gpu_no)

trainIters(encoder1,decoder1, 10000, print_every=100)

0m 14s (- 24m 26s) (100 1%) 1.2606
0m 28s (- 22m 55s) (200 2%) 0.5848
0m 43s (- 23m 30s) (300 3%) 0.2365
0m 56s (- 22m 31s) (400 4%) 0.1183
1m 12s (- 22m 56s) (500 5%) 0.0502
1m 25s (- 22m 23s) (600 6%) 0.0256
1m 40s (- 22m 14s) (700 7%) 0.0181
1m 53s (- 21m 43s) (800 8%) 0.0125
2m 7s (- 21m 29s) (900 9%) 0.0123
2m 20s (- 21m 0s) (1000 10%) 0.0091
2m 33s (- 20m 43s) (1100 11%) 0.0075
2m 48s (- 20m 33s) (1200 12%) 0.0068
3m 3s (- 20m 28s) (1300 13%) 0.0064
3m 18s (- 20m 17s) (1400 14%) 0.0055
3m 32s (- 20m 2s) (1500 15%) 0.0046
3m 45s (- 19m 46s) (1600 16%) 0.0043
3m 59s (- 19m 27s) (1700 17%) 0.0041
4m 15s (- 19m 21s) (1800 18%) 0.0035
4m 28s (- 19m 6s) (1900 19%) 0.0033
4m 40s (- 18m 43s) (2000 20%) 0.0034
4m 55s (- 18m 30s) (2100 21%) 0.0031
5m 7s (- 18m 10s) (2200 22%) 0.0028
5m 23s (- 18m 2s) (2300 23%) 0.0025
5m 36s (- 17m 46s) (2400 24%) 0.0026
5m 50s (- 17m 31s) (2500 25%) 0.0027
6m 6s (- 17m 22s) (2600 26%) 0.0021
6m 20s (- 17m 8s) (2700 27%) 0.0024
6m 35s (- 16m 57s) (2800 28%