In [1]:
import torch
from torch import nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np

In [2]:
class myLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, outputs):
        super(myLSTM, self).__init__()

        self.model = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
        )

        self.out = nn.Linear(hidden_size, outputs)

    def forward(self, x):
        # x shape (batch, time_step, input_size)
        # r_out shape (batch, time_step, output_size)
        # h_n shape (n_layers, batch, hidden_size)
        # h_c shape (n_layers, batch, hidden_size)
        r_out, (h_n, h_c) = self.model(x, None)   # None represents zero initial hidden state

        # choose r_out at the last time step
        out = self.out(r_out[:, -1, :])
        return out

# Test Dataset (Numbers)

In [38]:
# Device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Hyper parameters
num_epochs = 2000
num_classes = 10
batch_size = 10
learning_rate = 0.001

In [39]:
# Data
numbers_train_forwards = [0,1,2,3,4,5,6,7,8,9,0]
numbers_train_backwards = [0,9,8,7,6,5,4,3,2,1,0]
numbers_train_i = [0,1,2,3,4,5,6,7,8,9]

In [44]:
# Train the model
import random

# Create model
net = myLSTM(5, 8, 1, 10)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    
    for i in numbers_train_i[0:5]:
        ## Forward numbers
        input_tensor = torch.Tensor([numbers_train_forwards[i:i+5]]).unsqueeze(0)

        # Forward pass
        outputs = net(input_tensor)

        label = torch.LongTensor([numbers_train_forwards[i+5]])
        loss = criterion(outputs, label)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step() 
        
        ## Backward numbers
        input_tensor = torch.Tensor([numbers_train_backwards[i:i+5]]).unsqueeze(0)

        # Forward pass
        outputs = net(input_tensor)

        label = torch.LongTensor([numbers_train_backwards[i+5]])
        loss = criterion(outputs, label)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch+1) % 100 == 0:
        print ('Epoch [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, loss.item()))

Epoch [100/2000], Loss: 1.4593
Epoch [200/2000], Loss: 0.9514
Epoch [300/2000], Loss: 0.7341
Epoch [400/2000], Loss: 0.6174
Epoch [500/2000], Loss: 0.5493
Epoch [600/2000], Loss: 0.5062
Epoch [700/2000], Loss: 0.4744
Epoch [800/2000], Loss: 0.4406
Epoch [900/2000], Loss: 0.3719
Epoch [1000/2000], Loss: 0.2360
Epoch [1100/2000], Loss: 0.1783
Epoch [1200/2000], Loss: 0.1487
Epoch [1300/2000], Loss: 0.1240
Epoch [1400/2000], Loss: 0.1023
Epoch [1500/2000], Loss: 0.0837
Epoch [1600/2000], Loss: 0.0682
Epoch [1700/2000], Loss: 0.0554
Epoch [1800/2000], Loss: 0.0448
Epoch [1900/2000], Loss: 0.0360
Epoch [2000/2000], Loss: 0.0287


In [45]:
# test_set = [[0],[9],[8],[7],[6]]

# Test the model
net.eval()  # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
with torch.no_grad():
    print('FORWARDS')
    for i in numbers_train_i[0:5]:
        ## Forward numbers
        input_tensor = torch.Tensor([numbers_train_forwards[i:i+5]]).unsqueeze(0)

        outputs = net(input_tensor)
        _, predicted = torch.max(outputs.data, 1)
        print('input:', np.array(input_tensor), 'output:', predicted)
    
    print('BACKWARDS')
    for i in numbers_train_i[0:5]:
        ## Forward numbers
        input_tensor = torch.Tensor([numbers_train_backwards[i:i+5]]).unsqueeze(0)

        outputs = net(input_tensor)
        _, predicted = torch.max(outputs.data, 1)
        print('input:', np.array(input_tensor), 'output:', predicted)

FORWARDS
input: [[[0. 1. 2. 3. 4.]]] output: tensor([5])
input: [[[1. 2. 3. 4. 5.]]] output: tensor([6])
input: [[[2. 3. 4. 5. 6.]]] output: tensor([7])
input: [[[3. 4. 5. 6. 7.]]] output: tensor([8])
input: [[[4. 5. 6. 7. 8.]]] output: tensor([9])
BACKWARDS
input: [[[0. 9. 8. 7. 6.]]] output: tensor([5])
input: [[[9. 8. 7. 6. 5.]]] output: tensor([4])
input: [[[8. 7. 6. 5. 4.]]] output: tensor([3])
input: [[[7. 6. 5. 4. 3.]]] output: tensor([2])
input: [[[6. 5. 4. 3. 2.]]] output: tensor([1])


In [16]:
# test_set = [0,1,2,3,4,5,6,7,8,9]
test_set = [0,9,8,7,6,5,4,3,2,1]

# Test the model
net.eval()  # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
with torch.no_grad():
    for test_num in test_set:
        outputs = net(torch.Tensor([test_num]).unsqueeze(0).unsqueeze(0))
        _, predicted = torch.max(outputs.data, 1)
        print('input:', test_num, 'output:', predicted)

input: 0 output: tensor([1])
input: 9 output: tensor([0])
input: 8 output: tensor([8])
input: 7 output: tensor([7])
input: 6 output: tensor([6])
input: 5 output: tensor([5])
input: 4 output: tensor([3])
input: 3 output: tensor([3])
input: 2 output: tensor([2])
input: 1 output: tensor([0])


In [9]:
# Test the model
net.eval()  # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))

NameError: name 'test_loader' is not defined

# MNIST

In [35]:
# Device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Hyper parameters
num_epochs = 2
num_classes = 10
batch_size = 100
learning_rate = 0.001

In [36]:
# MNIST dataset
# new_mirror = 'https://ossci-datasets.s3.amazonaws.com/mnist'
# torchvision.datasets.MNIST.resources = [
#    ('/'.join([new_mirror, url.split('/')[-1]]), md5)
#    for url, md5 in torchvision.datasets.MNIST.resources
# ]

train_dataset = torchvision.datasets.MNIST(root='data',
                                           train=True, 
                                           transform=transforms.ToTensor())

test_dataset = torchvision.datasets.MNIST(root='data',
                                          train=False, 
                                          transform=transforms.ToTensor())

In [37]:
# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size, 
                                          shuffle=False)

In [39]:
# Create model
net = myLSTM(28, 64, 1, 10)
net = net.to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = net(images.squeeze(1))
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

Epoch [1/2], Step [100/600], Loss: 1.1874
Epoch [1/2], Step [200/600], Loss: 0.6303
Epoch [1/2], Step [300/600], Loss: 0.5380
Epoch [1/2], Step [400/600], Loss: 0.3587
Epoch [1/2], Step [500/600], Loss: 0.3171
Epoch [1/2], Step [600/600], Loss: 0.1807
Epoch [2/2], Step [100/600], Loss: 0.2073
Epoch [2/2], Step [200/600], Loss: 0.3109
Epoch [2/2], Step [300/600], Loss: 0.3576
Epoch [2/2], Step [400/600], Loss: 0.2271
Epoch [2/2], Step [500/600], Loss: 0.2429
Epoch [2/2], Step [600/600], Loss: 0.1052


In [43]:
# Test the model
net.eval()  # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = net(images.squeeze())
        _, predicted = torch.max(outputs.data, 1)
        print(predicted)
        input()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))

tensor([7, 2, 1, 0, 4, 1, 4, 9, 5, 9, 0, 6, 9, 0, 1, 5, 9, 7, 3, 4, 9, 6, 6, 5,
        4, 0, 7, 4, 0, 1, 3, 1, 3, 6, 7, 2, 7, 1, 2, 1, 1, 7, 4, 6, 3, 5, 1, 2,
        4, 4, 6, 3, 5, 5, 6, 0, 4, 1, 9, 5, 7, 8, 9, 3, 7, 4, 6, 4, 3, 0, 7, 0,
        2, 9, 1, 7, 3, 2, 9, 7, 7, 6, 2, 7, 8, 4, 7, 3, 6, 1, 3, 6, 9, 3, 1, 4,
        1, 7, 6, 9], device='cuda:0')


TypeError: 'int' object is not callable

In [50]:
# Test the model
net.eval()  # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = net(images.squeeze())
        _, predicted = torch.max(outputs.data, 1)
        print(images.shape, predicted.shape)
        break

torch.Size([100, 1, 28, 28]) torch.Size([100])


In [None]:
# 