In [1]:
import torch as th
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from dataUtils import *
import time
%load_ext autoreload
%autoreload 2

# categories: 18 ['Japanese', 'Czech', 'Greek', 'Irish', 'French', 'English', 'Chinese', 'Polish', 'Portuguese', 'Italian', 'Scottish', 'Vietnamese', 'Dutch', 'Arabic', 'Spanish', 'German', 'Russian', 'Korean']
O'Neal


In [2]:
class GenRNN(nn.Module):
    
    def __init__(self, dim_inputChar, dim_hiddenState, dim_output, dim_catagory = 18):
        super(GenRNN, self).__init__()
        self.hiddenStateDim = dim_hiddenState
        self.i2h = nn.Linear(dim_catagory + dim_inputChar + dim_hiddenState, dim_hiddenState)
        self.i2a = nn.Linear(dim_catagory + dim_inputChar + dim_hiddenState, dim_output)
        self.r2o = nn.Linear(dim_output + dim_hiddenState, dim_output)
        self.softmax = nn.LogSoftmax()
        self.dropout = nn.Dropout(0.1)
    
    def forward(self, catagory_tensor, input_tensor, hidden_tensor):
        combined = torch.cat([ catagory_tensor, input_tensor, hidden_tensor], 1)
        hiddenState = self.i2h(combined)
        abstraction = self.i2a(combined)
        rep4clf = torch.cat([hiddenState, abstraction], 1)
        scores = self.r2o(rep4clf)
        logProbs = self.softmax(self.dropout(scores))
        return logProbs, hiddenState
    
    def init_hidden_state(self):
        return Variable(torch.zeros(1, self.hiddenStateDim)).cuda()

In [5]:
criterion = nn.NLLLoss()

learning_rate = 0.0005

def train(category_tensor, input_line_tensor, target_line_tensor):
    hidden = rnn.init_hidden_state()

    rnn.zero_grad()

    loss = 0

    for i in range(input_line_tensor.size()[0]):
        output, hidden = rnn(category_tensor.cuda(), input_line_tensor[i].cuda(), hidden.cuda())
        loss += criterion(output, target_line_tensor[i].cuda())

    loss.backward()

    for p in rnn.parameters():
        p.data.add_(-learning_rate, p.grad.data)

    return output, loss.data[0] / input_line_tensor.size()[0]


In [9]:
rnn = GenRNN(n_letters, 128, n_letters)
rnn.cuda()

n_epochs = 100000
print_every = 5000
plot_every = 500
all_losses = []
total_loss = 0 # Reset every plot_every epochs

start = time.time()

for epoch in range(1, n_epochs + 1):
    output, loss = train(*randomTrainingSet())
    total_loss += loss

    if epoch % print_every == 0:
        print('%s (%d %d%%) %.4f' % (timeSince(start), epoch, epoch / n_epochs * 100, loss))

    if epoch % plot_every == 0:
        all_losses.append(total_loss / plot_every)
        total_loss = 0

0m 38s (5000 5%) 2.7450
1m 16s (10000 10%) 3.2310
1m 55s (15000 15%) 2.4350
2m 34s (20000 20%) 2.4102
3m 13s (25000 25%) 3.3988
3m 53s (30000 30%) 2.0428
4m 32s (35000 35%) 2.6142
5m 14s (40000 40%) 2.4453
5m 57s (45000 45%) 1.9459
6m 43s (50000 50%) 2.4700
7m 32s (55000 55%) 2.8779
8m 19s (60000 60%) 1.7231
9m 10s (65000 65%) 2.7455
9m 51s (70000 70%) 1.8689
10m 44s (75000 75%) 3.1268
11m 41s (80000 80%) 2.6730
12m 47s (85000 85%) 2.9261
14m 45s (90000 90%) 2.3822
49m 31s (95000 95%) 1.7886
53m 15s (100000 100%) 3.1828


In [11]:
f = open("name_gen.pkl", "bw")
torch.save(rnn.state_dict(), f)
f.close()