In [1]:
import glob
import unicodedata
import string

In [2]:
def find_files(path): return glob.glob(path)
print(find_files('./data/names/*.txt'))

['./data/names/Arabic.txt', './data/names/English.txt', './data/names/German.txt', './data/names/Vietnamese.txt', './data/names/Italian.txt', './data/names/Czech.txt', './data/names/Scottish.txt', './data/names/Dutch.txt', './data/names/Korean.txt', './data/names/Chinese.txt', './data/names/Greek.txt', './data/names/Irish.txt', './data/names/French.txt', './data/names/Portuguese.txt', './data/names/Russian.txt', './data/names/Japanese.txt', './data/names/Polish.txt', './data/names/Spanish.txt']


In [3]:
all_letters = string.ascii_letters + " .,;'"
n_letters = len(all_letters)

# Turn a Unicode string to plain ASCII, thanks to http://stackoverflow.com/a/518232/2809427
def unicode2ascii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
        and c in all_letters
    )
print(unicode2ascii("Ślusàrski"))

Slusarski


In [4]:
# Build the category_lines dictionary, a list of names per language
category_lines = {}
all_categories = []

# Read a file and split into lines
def read_lines(filename):
    lines = open(filename).read().strip().split('\n')
    return [unicode2ascii(line) for line in lines]

for filename in find_files('./data/names/*.txt'):
    category = filename.split('/')[-1].split('.')[0]
    all_categories.append(category)
    lines = read_lines(filename)
    category_lines[category] = lines

n_categories = len(all_categories)

In [5]:
import torch

# Find letter index from all_letters, e.g. "a" = 0
def letter2index(letter):
    return all_letters.find(letter)

# Just for demonstration, turn a letter into a <1 x n_letters> Tensor
def letter2tensor(letter):
    tensor = torch.zeros(1, n_letters)
    tensor[0][letter2index(letter)] = 1
    return tensor

# Turn a line into a <line_length x 1 x n_letters>,
# or an array of one-hot letter vectors
def line2tensor(line):
    tensor = torch.zeros(len(line), 1, n_letters)
    for li, letter in enumerate(line):
        tensor[li][0][letter2index(letter)] = 1
    return tensor

In [6]:
print(line2tensor('keon').size())

torch.Size([4, 1, 57])


In [207]:
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F

class RWA(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RWA, self).__init__()
        
        self.max_steps = 1
        self.batch_size = 1
        self.hidden_size = hidden_size
        
        self.n = Variable(torch.Tensor(self.batch_size, hidden_size), requires_grad=True)
        self.d = Variable(torch.Tensor(self.batch_size, hidden_size), requires_grad=True)
        
        self.x2u = nn.Linear(input_size, hidden_size)
        self.c2g = nn.Linear(input_size + hidden_size, hidden_size)
        self.c2q = nn.Linear(input_size + hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
    
    def forward(self, input, hidden):
        h = F.tanh(hidden)
        
        for i in range(len(input)):
            combined = torch.cat((input[i], h), 1)
            
            
            u = self.x2u(input[i])
            g = self.c2g(combined)
            q = self.c2q(combined)
            q_greater = F.relu(q)
            scale = torch.exp(-q_greater)
            a_scale = torch.exp(q-q_greater)
            self.n = (self.n * scale) + ((u * F.tanh(g)) * a_scale)
            self.d = (self.d * scale) + a_scale
            h = F.tanh(torch.div(self.n, self.d))
        output = self.out(h)
        return output, h

    def init_hidden(self):
        return Variable(torch.randn(1, self.hidden_size))

n_hidden = 128
rwa = RWA(n_letters, n_hidden, n_categories)
print("n_letters:", n_letters, "n_hidden:", n_hidden, "n_categories:", n_categories)
print(rwa)

n_letters: 57 n_hidden: 128 n_categories: 18
RNN (
  (x2u): Linear (57 -> 128)
  (c2g): Linear (185 -> 128)
  (c2q): Linear (185 -> 128)
  (out): Linear (128 -> 18)
)


In [211]:

input = Variable(line2tensor('Keon'))
hidden = rwa.init_hidden()

output, next_hidden = rwa(input, hidden)
print(output, next_hidden)


Variable containing:

Columns 0 to 9 
-0.2181  0.1584  0.0678  0.1462 -0.2016  0.0297 -0.0068  0.1225 -0.1715  0.1818

Columns 10 to 17 
 0.4594  0.0848  0.2152 -0.3734 -0.1102  0.1573 -0.0488  0.0308
[torch.FloatTensor of size 1x18]
 Variable containing:

Columns 0 to 5 
 1.8044e-03  1.9991e-02  3.1786e-03  2.6121e-03  1.6083e-02  1.2365e-03

Columns 6 to 11 
-6.8232e-03 -5.7148e-03 -5.0756e-03  9.6675e-03 -2.3871e-03  3.2704e-03

Columns 12 to 17 
 6.2080e-03 -5.1337e-03 -6.1500e-03  8.9973e-03  4.4000e-03 -6.0094e-03

Columns 18 to 23 
 1.0000e+00  1.0000e+00 -1.5707e-03 -2.6361e-02 -8.2430e-03 -6.7953e-03

Columns 24 to 29 
 1.3388e-02  1.9324e-02  1.0000e+00 -4.1651e-03  6.8685e-03  7.1560e-03

Columns 30 to 35 
-1.0000e+00  5.2513e-03  7.3723e-13  1.1183e-02  7.1851e-03 -2.0261e-09

Columns 36 to 41 
-1.8267e-04  2.3346e-03 -8.1337e-25 -4.4789e-29 -1.7496e-03 -7.9274e-03

Columns 42 to 47 
 8.9760e-03  7.7648e-03  3.7587e-12  1.1966e-02 -6.3236e-22 -5.5934e-32

Columns 48 to 53 


In [212]:
def category_from_output(output):
    top_n, top_i = output.data.topk(1) # Tensor out of Variable with .data
    category_i = top_i[0][0]
    return all_categories[category_i], category_i
print(category_from_output(output))

('Greek', 10)


In [213]:
import random

def choose(l):
    return l[random.randint(0, len(l) - 1)]

def training_pair():                                                                                                               
    category = choose(all_categories)
    line = choose(category_lines[category])
    category_tensor = Variable(torch.LongTensor([all_categories.index(category)]))
    line_tensor = Variable(line2tensor(line))
    return category, line, category_tensor, line_tensor

for i in range(10):
    category, line, category_tensor, line_tensor = training_pair()
    print('category :', category, ' \tname:', line)

category : Czech  	name: MaxaB
category : Polish  	name: Jedynak
category : Scottish  	name: White
category : Japanese  	name: Obata
category : Japanese  	name: Kaza
category : Greek  	name: Anetakis
category : Greek  	name: Spyridis
category : German  	name: Wegner
category : Chinese  	name: Yang
category : Polish  	name: Winogrodzki


In [214]:
learning_rate = 0.005
def train (categroy_tensor, line_tensor):
    hidden = rwa.init_hidden()
    hidden = Variable(hidden.data)
    rwa.zero_grad()
    output, hidden = rwa(line_tensor, hidden)
    loss = criterion(output, category_tensor)
    print("loss:" , loss)
    loss.backward()
    
    for p in rwa.parameters():
        p.data.add_(-learning_rate, p.grad.data)
    return output, loss.data[0]

In [215]:
import time
import math

In [216]:
n_epochs = 100000
print_every = 5000
plot_every = 1000

rwa = RWA(n_letters, n_hidden, n_categories)

# Keep track of losses for plotting
current_loss = 0
all_losses = []

def time_since(since):
    now = time.time()
    s = now - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

start = time.time()

for epoch in range(1, n_epochs + 1):
    category, line, category_tensor, line_tensor = training_pair()
    output, loss = train(category_tensor, line_tensor)
    current_loss += loss

    # Print epoch number, loss, name and guess
    if epoch % print_every == 0:
        guess, guess_i = category_from_output(output)
        correct = '✓' if guess == category else '✗ (%s)' % category
        print('%d %d%% (%s) %.4f %s / %s %s' % (epoch, epoch / n_epochs * 100, time_since(start), loss, line, guess, correct))

    # Add current loss avg to list of losses
    if epoch % plot_every == 0:
        all_losses.append(current_loss / plot_every)
        current_loss = 0

loss: Variable containing:
nan
[torch.FloatTensor of size 1]

loss: Variable containing:
nan
[torch.FloatTensor of size 1]



RuntimeError: Trying to backward through the graph second time, but the buffers have already been freed. Please specify retain_variables=True when calling backward for the first time.