In [None]:
from io import open
import string, os, random, time, math
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from IPython.display import clear_output
import requests

In [None]:
r = requests.get("https://data.heatonresearch.com/data/t81-558/text/"\
                 "treasure_island.txt")


In [None]:
languages =[]
data = []
x =[]
y = []
with open('name2lang.txt' ,'r') as f:
  for line in f:
    line = line.split(',')
    name = line[0].strip()
    lang = line[1].strip()
    if not lang in languages:
      languages.append(lang)
    x.append(name)
    y.append(lang)
    data.append((name , lang))

  n_languages = len(languages)
  print(languages)


In [None]:
x_train , x_test , y_train , y_test = train_test_split(x , y , test_size =0.2 ,random_state = 0 , stratify = y)

In [None]:
print(x_train)

In [None]:
print(len(x_train))

In [None]:
all_letters = string.ascii_letters + ",.;'"
n_letters = len(all_letters)

In [None]:
def name_rep(name):
  rep = torch.zeros(len(name) ,1, n_letters)
  for index,letter in enumerate(name):
    pos = all_letters.find(letter)
    rep[index][0][pos] = 1
  return rep

In [None]:
def lang_rep(lang):
  return torch.tensor([languages.index(lang)] , dtype = torch.long)

In [None]:
count = {}
for l in languages:
  count[l] = 0
for d in data:
  count[d[1]]+= 1

In [None]:
plt_ = sns.barplot(list(count.keys()) , list(count.values()))
plt_.set_xticklabels(plt_.get_xticklabels(), rotation = 90)
plt.show()

In [None]:
class RNN(nn.Module):
  def __init__(self, input_size, hidden_size, output_size):
    super(RNN, self).__init__()
    self.hidden_size = hidden_size
    self.i2h = nn.Linear(input_size + hidden_size , hidden_size)
    self.i2o = nn.Linear(input_size + hidden_size , output_size)
    self.softmax = nn.LogSoftmax(dim = 1)

  def forward(self , input_ , hidden):
    combined = torch.cat((input_ , hidden) ,1)
    hidden = self.i2h(combined)
    output = self.i2o(combined)
    output = self.softmax(output)
    return output , hidden
  
  def init_hidden(self):
    return torch.zeros(1,self.hidden_size)

In [None]:
n_hidden = 128
net = RNN(n_letters , n_hidden, n_languages)

In [None]:
def infer(net , name):
  net.eval()
  name_oh = name_rep(name)
  hidden = net.init_hidden()
  for i in range(name_oh.size()[0]):
    output , hidden = net(name_oh[i] , hidden)
  return output
                 



In [None]:
output = infer(net , 'alex')
index = torch.argmax(output)
print(index)

In [None]:
def dataloader(n_points,x_,y_):
  to_ret =[]
  for i in range(n_points):
    index = np.random.randint(len(x_))
    name , lang = x_[index],y_[index]
    to_ret.append((name,lang , name_rep(name) , lang_rep(lang)))
  return to_ret


In [None]:
def eval(net , n_points ,k , x_ , y_):
  data_ = dataloader(n_points , x_ , y_)
  correct = 0
  for name , language, name_oh , lang_rep in data_:
    output = infer(net , name)
    val , indicies = output.topk(k)
    if lang_rep in indicies:
      correct += 1
  accuracy = correct/n_points
  return accuracy



In [None]:
def train(net , opt , los , n_points):
  opt.zero_grad()
  total_loss = 0
  data_ = dataloader(n_points , x_train , y_train)
  for name , language, name_oh , lang_rep in data_:
    hidden = net.init_hidden()
    for i in range(name_oh.size()[0]):
      output , hidden = net(name_oh[i] , hidden)
  loss = los(output , lang_rep)
  loss.backward(retain_graph = True)
  total_loss += loss
  opt.step()
  return total_loss/n_points


In [None]:
los = nn.NLLLoss()
opt = optim.SGD(net.parameters() , lr =0.01, momentum=0.97)

In [None]:
def train_setup(net , lr=0.01, n_batches = 100 , batch_size = 10 , momentum = 0.9 , display_freq=5):
  crit = nn.NLLLoss()
  opt = optim.SGD(net.parameters(), lr = lr, momentum=momentum)
  loss_arr = np.zeros(n_batches+1)  
  for i in range(n_batches):
    loss_arr[i+1] = (loss_arr[i]*i + train(net,opt, crit , batch_size))/(i+1)
    if  i % display_freq == display_freq -1:
      clear_output(wait = True)
      print('iteration' , i , 'top-1', eval(net , len(x_test) , 1 , x_test , y_test) , 'top-2' ,eval(net , len(x_test) , 2 , x_test , y_test) )
      plt.figure()
      plt.plot(loss_arr[1:i],'-*')
      plt.xlabel('iteration')
      plt.ylabel('loss')
      plt.show()
      print('\n\n')

In [None]:
n_hidden = 128
net = RNN(n_letters , n_hidden, n_languages)
train_setup(net , lr=0.005, n_batches=100 , batch_size = 256 , momentum = 0.96, display_freq = 5)


In [None]:
class lstm(nn.Module):
  def __init__(self, input_size , hidden_size , output_size):
    super(lstm,self).__init__()
    self.hidden_size = hidden_size
    self.lstm_cell = nn.LSTM(input_size , hidden_size ,2)
    self.h2o = nn.Linear(hidden_size , output_size)
    self.softmax = nn.LogSoftmax(dim=2)
  
  def forward(self , input , hidden):
    out , hidden = self.lstm_cell(input.view(1,1,-1), hidden)
    output = self.h2o(hidden[0])
    output = self.softmax(output)
    return output.view(1,-1) , hidden

  def init_hidden(self):
    return (torch.zeros(2,1, self.hidden_size) , torch.zeros(2,1,self.hidden_size))


In [None]:
n_hiddeen = 128
net = lstm(n_letters , n_hidden , n_languages)
train_setup(net , lr=0.005, n_batches=100 , batch_size = 256 , momentum = 0.96, display_freq = 5)