In [1]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import unicodedata
import glob
import string 
import os

In [2]:
all_letters = string.ascii_letters + " .,;'-"
n_letters = len(all_letters) + 1 # Plus EOS marker


In [3]:
def findFiles(path): return glob.glob(path)
# Turn a Unicode string to plain ASCII, thanks to http://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
        and c in all_letters
    )

# Read a file and split into lines
def readLines(filename):
    lines = open(filename, encoding='utf-8').read().strip().split('\n')
    return [unicodeToAscii(line) for line in lines]

# Build the category_lines dictionary, a list of lines per category
category_lines = {}
all_categories = []
for filename in findFiles('data/names/*.txt'):
    category = os.path.splitext(os.path.basename(filename))[0]
    all_categories.append(category)
    lines = readLines(filename)
    category_lines[category] = lines

n_categories = len(all_categories)

#print('# categories:', n_categories, all_categories)
#print(unicodeToAscii("O'Néàl"))


In [4]:
import random
import torch
# Random item from a list
def randomChoice(l):
    return l[random.randint(0, len(l) - 1)]

# Get a random category and random line from that category
def randomTrainingPair():
    category = randomChoice(all_categories)
    line = randomChoice(category_lines[category])
    return category, line

In [5]:
# One-hot vector for category
def categoryTensor(category):
    li = all_categories.index(category)
    tensor = torch.zeros(1, n_categories)
    tensor[0][li] = 1
    return tensor

# One-hot matrix of first to last letters (not including EOS) for input
def inputTensor(line):
    tensor = torch.zeros(len(line), 1, n_letters)
    for li in range(len(line)):
        letter = line[li]
        tensor[li][0][all_letters.find(letter)] = 1
    return tensor

# LongTensor of second letter to end (EOS) for target
def targetTensor(line):
    letter_indexes = [all_letters.find(line[li]) for li in range(1, len(line))]
    letter_indexes.append(n_letters - 1) # EOS
    return torch.LongTensor(letter_indexes)

In [6]:
cat,line=randomTrainingPair()
print(line)

Que


In [7]:
def randomTrainingExample():
    category, line = randomTrainingPair()
    category_tensor = categoryTensor(category)
    input_line_tensor = inputTensor(line)
    target_line_tensor = targetTensor(line)
    return category_tensor, input_line_tensor, target_line_tensor


In [8]:
class Name_Gen(nn.Module):
    def __init__(self,n_cat,n_input,n_hidden):
        super(Name_Gen,self).__init__()
        self.rnn_cell=nn.RNNCell(n_cat+n_input+n_hidden,n_hidden,nonlinearity='relu')
        self.drop=nn.Dropout(p=0.2)
        self.fc=nn.Linear(n_hidden,n_letters)
        
    def forward(self,input):
        hidden=self.rnn_cell(input)
        input=self.drop(hidden)
        input=F.softmax(self.fc(input))
        return input , hidden

cat,line,target=randomTrainingExample()
n_input=line[0].size()[1]
epoch=100
from torch.autograd import Variable



In [10]:
model=Name_Gen(n_categories,n_input,128)
criterion=nn.NLLLoss()
optimizer=optim.Adam(model.parameters(),lr=0.0001)
for i in range(epoch):
    init_zero=torch.zeros(1,128)
    cat,line,target=randomTrainingExample()
    #rint(init_zero.shape,line[0].shape,cat.shape)
    loss = 0
    
    for p in range(line.size()[0]):
        out,init_zero=model(torch.cat((line[p],init_zero,cat),1))
        tar=torch.zeros(1,59)
        tar[0][target[p]]=1
        tar=tar.type(torch.LongTensor)
        l=criterion(out,torch.max(tar,1)[1])
        loss+= l
       
    optimizer.zero_grad()
    #rint(loss.data)
    loss.backward()
    
    optimizer.step()
        
        
# Sample from a category and starting letter
def sample(category, start_letter='A'):
    with torch.no_grad():  # no need to track history in sampling
        category_tensor = categoryTensor(category)
        
        input = inputTensor(start_letter).squeeze(0)
        
        print(input)
        hidden = torch.zeros(1,128)
        
        output_name = start_letter
     
        for i in range(8):
            
            p=torch.cat((category_tensor,input,hidden),1)
            output, hidden =model(p)
            topv, topi = output.topk(1)
            topi = topi[0][0]
            if topi == n_letters - 1:
                break
            else:
                letter = all_letters[topi]
                output_name += letter
            input = inputTensor(letter).squeeze(0)

        return output_name
    
print(sample('Russian'))

  # This is added back by InteractiveShellApp.init_path()


tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0.]])
AgPzCQ
