In [1]:
import torch

# Introducción a PyTorch

Contenidos:
* Que es PyTorch
* Comandos básicos
* Ejemplos de la Primera Tarea
* Ejemplos para la segunda Tarea


## Que es PyTorch

Objetivo:
* Calculos con tensores con soporte para GPU
* Sistema auto-diferenciable para DNN

### Tensores
* Trabajar con ndarray (numpy) es trabajar con tensores, son vectores de multiples dimensiones.
* PyTorch permite que estos tensores puedan vivir tanto CPU con GPU, acelerando los calculos
* Se proveen diferentes funciones, al igual que en numpy

### Grafos dinamicos
* Puedes modificar (dentro de lo posible) el comportamiento de la red sin partir de cero
* Ya son varias las librerias que estan adoptando esta misma idea
 * PyTorch no fue el primero
 
 
## Comandos Basicos



In [2]:
x = torch.rand(5, 3)
print(x)
print(x.size())

tensor([[ 0.9669,  0.2019,  0.0537],
        [ 0.6339,  0.9273,  0.6596],
        [ 0.1822,  0.1147,  0.1867],
        [ 0.7765,  0.8504,  0.7440],
        [ 0.5203,  0.0558,  0.4122]])
torch.Size([5, 3])


In [3]:
y = torch.tensor([1,2,3,4])
print(y)
print(y*3)
print(y*y)
print(y.add_(y))
print(y)

tensor([ 1,  2,  3,  4])
tensor([  3,   6,   9,  12])
tensor([  1,   4,   9,  16])
tensor([ 2,  4,  6,  8])
tensor([ 2,  4,  6,  8])


In [4]:
x = torch.randn(4, 4)
y = x.view(16)
z = x.view(-1, 8)  # the size -1 is inferred from other dimensions
print(x.size(), y.size(), z.size())

torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])


### CUDA Tensors

In [5]:
# at beginning of the script
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# then whenever you get a new Tensor or Module
# this won't copy if they are already on the desired device
y = torch.ones_like(x, device=device)
print(y.dtype) # torch.cuda.FloatTensor

torch.float32


## Ejemplos de la Primera Tarea

In [6]:
import torch.optim as optim
from torchvision import datasets, models, transforms

### Resnet

In [7]:
model_resnet = models.resnet18(pretrained=False)
print(model_resnet)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Co

### AlexNet

In [8]:
import torch.nn as nn
import torch.utils.model_zoo as model_zoo

model_urls = {
    'alexnet': 'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth',
}

class AlexNet(nn.Module):

    def __init__(self, num_classes=1000):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), 256 * 6 * 6)
        x = self.classifier(x)
        return x

    def forward2(self, x):
        for param in self.features:
            x = param(x)
        x = x.view(x.size(0), 256 * 6 * 6)
        x = self.classifier(x)
        return x
    
def alexnet(pretrained=False, model_root=None, **kwargs):
    model = AlexNet(**kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['alexnet'], model_root))
    return model

model_alexnet = alexnet(pretrained=True)
print(model_alexnet)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Dropout(p=0.5)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace)
    (3): Dropout(p=0.5)
    (4): Linear(in_features=4096, out_feature

Uno puede interar sobre las capas,

Esto con el fin de realizar operaciones sobre estas o verificar el tamaño de los pesos

In [9]:
for param in model_alexnet.features: #model_alexnet.parameters
    if isinstance(param,nn.Conv2d):
        print(param.weight.size())

torch.Size([64, 3, 11, 11])
torch.Size([192, 64, 5, 5])
torch.Size([384, 192, 3, 3])
torch.Size([256, 384, 3, 3])
torch.Size([256, 256, 3, 3])


#### Ejemplo

In [10]:
input = torch.randn(1, 3, 224, 224)
print(model_alexnet(input))

tensor([[ 0.7552, -0.1689, -0.9439, -0.5384,  1.1559,  0.8958,  0.5463,
          0.2871, -0.1313, -0.7396, -1.0388,  0.3761,  0.8632,  0.6398,
         -0.1557, -1.0014, -1.0093,  0.0139, -0.0205,  0.0312, -2.7950,
         -1.5705, -2.2777, -0.7017, -0.9930, -1.3842, -0.3272,  0.5274,
         -0.1193,  0.1863, -0.2608,  0.9001, -1.0014, -1.1853,  0.4133,
         -0.6859, -0.1114,  0.4628,  0.9162,  0.1040,  1.5633, -1.1323,
          0.4904, -0.0816,  0.4723, -0.3235,  2.2951,  0.9333,  0.5476,
         -0.1700, -0.9409, -1.4243, -0.0917, -1.2227, -0.9893,  0.6052,
         -1.7847, -0.8357,  0.3175,  0.4079,  0.9060, -0.8474, -1.3600,
         -0.6324,  0.2319, -1.2032, -0.9547, -0.7137, -0.2251, -0.9471,
          1.0247, -0.6131, -0.1351, -0.8512, -1.1499,  0.3398,  0.9506,
          0.7957,  2.4142,  1.1020, -1.5938, -1.1928, -1.0387, -2.0637,
          0.3256, -0.0194, -1.1271, -0.6633,  1.1098, -0.1492,  1.0599,
         -1.1323, -0.4617,  0.5242,  1.6831, -2.0778,  1.7609, -

In [12]:
output = model_alexnet(input)
target = torch.arange(1, 1001)  # a dummy target, for example
target = target.view(1, -1)  # make it the same shape as output
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)

model_alexnet.zero_grad()     # zeroes the gradient buffers of all parameters

print('conv1.bias.grad before backward')
print(model_alexnet.features[0].bias.grad)

loss.backward()

print('conv1.bias.grad after backward')
print(model_alexnet.features[0].bias.grad)

learning_rate = 0.01
for f in model_alexnet.parameters():
    f.data.sub_(f.grad.data * learning_rate)

tensor(1.00000e+05 *
       3.3377)
conv1.bias.grad before backward
None
conv1.bias.grad after backward
tensor([-17.9168,  -0.0625, -10.7579,   2.0432, -62.3260, -58.9705,
        -20.9254,  -8.7410,  24.5047,  22.4319,   6.4826,  11.8428,
          0.3501,  -0.5553,  22.4283,   5.5764, -28.9538,  34.4560,
        -11.0774,   3.8307,  -4.5126, -17.0148,  20.0543,  -0.5957,
         -4.9456,   0.7406, -16.4618,  13.4512,  -1.9954,  17.6860,
         11.6637,  -0.8091, -10.9502,  13.3491,  -8.3213,   0.9344,
          1.4198,  15.4449, -22.1429, -15.8686,  13.8913,   1.5511,
          5.0415,  41.4938, -27.3529, -19.8215,  -0.8137, -20.8404,
         -3.9847,  -3.5817, -18.0827,   0.0204, -10.6890,  24.5702,
          1.2333, -40.8597, -54.4634,   6.1019,  22.3676,  31.8456,
        -25.1448,  26.5563,  51.6522, -25.9091])


In [13]:
import torch.optim as optim

# create your optimizer
optimizer = optim.SGD(model_alexnet.parameters(), lr=0.01)

# in your training loop:
optimizer.zero_grad()   # zero the gradient buffers
output = model_alexnet(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()    # Does the update

### Transfer Learning
Volviendo a la resNet

In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import time
import os
import copy

In [16]:
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

data_dir = '/media/julio/DATA/Ayudantia/DeepLearning/Tarea2/hymenoptera_data'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes
print(class_names)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

['ants', 'bees']


In [17]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                scheduler.step()
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [18]:
model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 2)

model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

In [65]:
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=25)

Epoch 0/24
----------
train Loss: 0.5872 Acc: 0.7090
val Loss: 0.3524 Acc: 0.8693

Epoch 1/24
----------
train Loss: 0.6807 Acc: 0.7459
val Loss: 0.3512 Acc: 0.8824

Epoch 2/24
----------
train Loss: 0.5555 Acc: 0.7541
val Loss: 0.7107 Acc: 0.7451

Epoch 3/24
----------
train Loss: 0.4910 Acc: 0.7787
val Loss: 0.2664 Acc: 0.8889

Epoch 4/24
----------
train Loss: 0.5400 Acc: 0.7664
val Loss: 0.3216 Acc: 0.8758

Epoch 5/24
----------
train Loss: 0.5663 Acc: 0.7910
val Loss: 0.3162 Acc: 0.8824

Epoch 6/24
----------
train Loss: 0.5255 Acc: 0.7582
val Loss: 0.2417 Acc: 0.9150

Epoch 7/24
----------
train Loss: 0.2440 Acc: 0.8975
val Loss: 0.1852 Acc: 0.9346

Epoch 8/24
----------
train Loss: 0.2969 Acc: 0.8607
val Loss: 0.2276 Acc: 0.8954

Epoch 9/24
----------
train Loss: 0.3404 Acc: 0.8484
val Loss: 0.2049 Acc: 0.9216

Epoch 10/24
----------
train Loss: 0.2720 Acc: 0.9016
val Loss: 0.1950 Acc: 0.9346

Epoch 11/24
----------
train Loss: 0.2215 Acc: 0.9016
val Loss: 0.1884 Acc: 0.9281

Ep

Process Process-195:
Process Process-193:
Process Process-196:
Process Process-194:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/media/julio/DATA/Ayudantia/DeepLearning/Tarea2/env/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 52, in _worker_loop
    r = index_queue.get()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **se

KeyboardInterrupt: 

In [None]:
model_conv = torchvision.models.resnet18(pretrained=True)
for param in model_conv.parameters():
    param.requires_grad = False

# Parameters of newly constructed modules have requires_grad=True by default
num_ftrs = model_conv.fc.in_features
model_conv.fc = nn.Linear(num_ftrs, 2)

model_conv = model_conv.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that only parameters of final layer are being optimized as
# opoosed to before.
optimizer_conv = optim.SGD(model_conv.fc.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)

model_conv = train_model(model_conv, criterion, optimizer_conv,
                         exp_lr_scheduler, num_epochs=25)

## Ejemplos para la segunda Tarea

La tarea consiste en identificar el pais de origen de nombres, considerando que existe 18 paises diferentes.

Esto lo vamos a hacer a nivel de caracteres

In [19]:
from __future__ import unicode_literals, print_function, division
from io import open
import glob

def findFiles(path): return glob.glob(path)

print(findFiles('data/names/*.txt'))

import unicodedata
import string

all_letters = string.ascii_letters + " .,;'"
n_letters = len(all_letters)

# Turn a Unicode string to plain ASCII, thanks to http://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
        and c in all_letters
    )

print(unicodeToAscii('Ślusàrski'))

# Build the category_lines dictionary, a list of names per language
category_lines = {}
all_categories = []

# Read a file and split into lines
def readLines(filename):
    lines = open(filename, encoding='utf-8').read().strip().split('\n')
    return [unicodeToAscii(line) for line in lines]

for filename in findFiles('data/names/*.txt'):
    category = filename.split('/')[-1].split('.')[0]
    all_categories.append(category)
    lines = readLines(filename)
    category_lines[category] = lines

n_categories = len(all_categories)

['data/names/Arabic.txt', 'data/names/Chinese.txt', 'data/names/Czech.txt', 'data/names/Dutch.txt', 'data/names/English.txt', 'data/names/French.txt', 'data/names/German.txt', 'data/names/Greek.txt', 'data/names/Irish.txt', 'data/names/Italian.txt', 'data/names/Japanese.txt', 'data/names/Korean.txt', 'data/names/Polish.txt', 'data/names/Portuguese.txt', 'data/names/Russian.txt', 'data/names/Scottish.txt', 'data/names/Spanish.txt', 'data/names/Vietnamese.txt']
Slusarski


Ahora transformamos los nombres en tensores

In [20]:
import torch

# Find letter index from all_letters, e.g. "a" = 0
def letterToIndex(letter):
    return all_letters.find(letter)

# Just for demonstration, turn a letter into a <1 x n_letters> Tensor
def letterToTensor(letter):
    tensor = torch.zeros(1, n_letters)
    tensor[0][letterToIndex(letter)] = 1
    return tensor

# Turn a line into a <line_length x 1 x n_letters>,
# or an array of one-hot letter vectors
def lineToTensor(line):
    tensor = torch.zeros(len(line), 1, n_letters)
    for li, letter in enumerate(line):
        tensor[li][0][letterToIndex(letter)] = 1
    return tensor

print(letterToTensor('J'))

print(lineToTensor('Jones').size())

tensor([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]])
torch.Size([5, 1, 57])


In [21]:
import torch.nn as nn

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()

        self.hidden_size = hidden_size

        self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(input_size + hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        combined = torch.cat((input, hidden), 1)
        hidden = self.i2h(combined)
        output = self.i2o(combined)
        output = self.softmax(output)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, self.hidden_size)

n_hidden = 128
rnn = RNN(n_letters, n_hidden, n_categories)

In [22]:
input = letterToTensor('A')
hidden =torch.zeros(1, n_hidden)

output, next_hidden = rnn(input, hidden)

input = lineToTensor('Albert')
hidden = torch.zeros(1, n_hidden)

output, next_hidden = rnn(input[0], hidden)
print(output)

tensor([[-2.8960, -2.9029, -2.8845, -2.8476, -2.9815, -2.7949, -2.8907,
         -2.8361, -2.8249, -2.8032, -2.9560, -2.9359, -2.8181, -2.9227,
         -2.9040, -2.9587, -2.9078, -2.9924]])


In [23]:
def categoryFromOutput(output):
    top_n, top_i = output.topk(1)
    category_i = top_i[0].item()
    return all_categories[category_i], category_i

print(categoryFromOutput(output))

('French', 5)


In [24]:
import random

def randomChoice(l):
    return l[random.randint(0, len(l) - 1)]

def randomTrainingExample():
    category = randomChoice(all_categories)
    line = randomChoice(category_lines[category])
    category_tensor = torch.tensor([all_categories.index(category)], dtype=torch.long)
    line_tensor = lineToTensor(line)
    return category, line, category_tensor, line_tensor

for i in range(10):
    category, line, category_tensor, line_tensor = randomTrainingExample()
    print('category =', category, '/ line =', line)

category = Greek / line = Stavropoulos
category = Irish / line = William
category = French / line = Levesque
category = Dutch / line = Otten
category = French / line = Samson
category = German / line = Vonnegut
category = Scottish / line = Williamson
category = French / line = Forest
category = Korean / line = Noh
category = Russian / line = Shirinyants


In [25]:
criterion = nn.NLLLoss()
learning_rate = 0.005 # If you set this too high, it might explode. If too low, it might not learn

def train(category_tensor, line_tensor):
    hidden = rnn.initHidden()

    rnn.zero_grad()

    for i in range(line_tensor.size()[0]):
        output, hidden = rnn(line_tensor[i], hidden)

    loss = criterion(output, category_tensor)
    loss.backward()

    # Add parameters' gradients to their values, multiplied by learning rate
    for p in rnn.parameters():
        p.data.add_(-learning_rate, p.grad.data)

    return output, loss.item()

In [26]:
import time
import math

n_iters = 100000
print_every = 5000
plot_every = 1000

# Keep track of losses for plotting
current_loss = 0
all_losses = []

def timeSince(since):
    now = time.time()
    s = now - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

start = time.time()

for iter in range(1, n_iters + 1):
    category, line, category_tensor, line_tensor = randomTrainingExample()
    output, loss = train(category_tensor, line_tensor)
    current_loss += loss

    # Print iter number, loss, name and guess
    if iter % print_every == 0:
        guess, guess_i = categoryFromOutput(output)
        correct = '✓' if guess == category else '✗ (%s)' % category
        print('%d %d%% (%s) %.4f %s / %s %s' % (iter, iter / n_iters * 100, timeSince(start), loss, line, guess, correct))

    # Add current loss avg to list of losses
    if iter % plot_every == 0:
        all_losses.append(current_loss / plot_every)
        current_loss = 0

5000 5% (0m 4s) 1.9574 Long / Korean ✗ (Chinese)
10000 10% (0m 8s) 3.0889 Cruz / Korean ✗ (Portuguese)
15000 15% (0m 12s) 1.3388 Aswad / Arabic ✓
20000 20% (0m 16s) 0.7766 Voltolini / Italian ✓
25000 25% (0m 20s) 2.9906 Poulin / Irish ✗ (French)
30000 30% (0m 24s) 1.6639 Anopriev / French ✗ (Russian)
35000 35% (0m 29s) 1.5852 Cucinotta / Spanish ✗ (Italian)
40000 40% (0m 33s) 2.5500 Kazmier / German ✗ (Czech)
45000 45% (0m 37s) 1.1467 Offermans / Dutch ✓
50000 50% (0m 41s) 0.3192 Karahalios / Greek ✓
55000 55% (0m 45s) 2.0760 Neumann / French ✗ (German)
60000 60% (0m 49s) 3.0403 Openshaw / Irish ✗ (English)
65000 65% (0m 53s) 5.3111 Ruzzier / Czech ✗ (Italian)
70000 70% (0m 57s) 1.4699 Geroux / Arabic ✗ (French)
75000 75% (1m 1s) 2.2716 Bosch / Scottish ✗ (German)
80000 80% (1m 4s) 3.2323 Smit / German ✗ (Dutch)
85000 85% (1m 8s) 0.3407 D'cruze / Portuguese ✓
90000 90% (1m 12s) 0.7144 Srour / Arabic ✓
95000 95% (1m 15s) 0.0213 Kawasaki / Japanese ✓
100000 100% (1m 19s) 0.6712 Stewart /

In [27]:
# Just return an output given a line
def evaluate(line_tensor):
    hidden = rnn.initHidden()

    for i in range(line_tensor.size()[0]):
        output, hidden = rnn(line_tensor[i], hidden)

    return output

def predict(input_line, n_predictions=3):
    print('\n> %s' % input_line)
    with torch.no_grad():
        output = evaluate(lineToTensor(input_line))

        # Get top N categories
        topv, topi = output.topk(n_predictions, 1, True)
        predictions = []

        for i in range(n_predictions):
            value = topv[0][i].item()
            category_index = topi[0][i].item()
            print('(%.2f) %s' % (value, all_categories[category_index]))
            predictions.append([value, all_categories[category_index]])

predict('Dovesky')
predict('Jackson')
predict('Alvaro')


> Dovesky
(-0.75) Czech
(-0.82) Russian
(-2.89) English

> Jackson
(-0.27) Scottish
(-2.00) English
(-3.47) Czech

> Alvaro
(-0.66) Italian
(-1.14) Spanish
(-2.37) Portuguese
