In [5]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim
import torch.utils.data as Data

from torchvision import datasets, transforms

import matplotlib.pyplot as plt


In [9]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28*28, 200)
        self.fc2 = nn.Linear(200, 200)
        self.fc3 = nn.Linear(200,10)

    def forward(self, x):
        x = torch.nn.functional.relu(self.fc1(x))
        x = torch.nn.functional.relu(self.fc2(x))
        x = self.fc3(x)
        return torch.nn.functional.log_softmax(x, dim=1)


In [4]:
# set hyperparameters
batch_size = 200
learning_rate = 0.01
epochs = 10
log_interval = 100

In [7]:
# Load data

train_loader = torch.utils.data.DataLoader(
              datasets.MNIST('../data', train=True, download=True, transform= transforms.Compose(
                  [transforms.ToTensor(),
                   transforms.Normalize((0.1307,), (0.3081,))]
              )),
              batch_size = batch_size, shuffle=True)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, download = True, transform= transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1207,), (0.3081,))
    ])),
    batch_size = batch_size, shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 104482062.43it/s]


Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 18556325.65it/s]


Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 24683920.21it/s]


Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 7927810.56it/s]


Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw



In [11]:
net = Net()
print(net)

Net(
  (fc1): Linear(in_features=784, out_features=200, bias=True)
  (fc2): Linear(in_features=200, out_features=200, bias=True)
  (fc3): Linear(in_features=200, out_features=10, bias=True)
)


In [12]:
# Setup loss function and optimizer

optimizer = torch.optim.SGD(net.parameters(), lr= learning_rate, momentum=0.9)
criterion = nn.NLLLoss()

In [21]:
# Training

for epoch in range(epochs):
    total_loss = 0.0
    for batch_idx, (data, target) in enumerate(train_loader):
        #data, target = Variable(data), Variable(target)

        data = data.view(-1, 28*28) # resize from (batchsize, 1, 28,28)

        optimizer.zero_grad()
        net_out = net(data)
        loss = criterion(net_out, target)
        total_loss += loss.data
        loss.backward()
        optimizer.step()

        if batch_idx and batch_idx % log_interval == 0:
            print(f'Train Epoch: {epoch+1} | Loss: {loss.data} | Average Training Loss: {total_loss/len(train_loader.dataset)}')



Train Epoch: 1 | Loss: 0.050167761743068695 | Average Training Loss: 4.523328607319854e-05
Train Epoch: 1 | Loss: 0.06190430745482445 | Average Training Loss: 9.795054938877001e-05
Train Epoch: 2 | Loss: 0.027787471190094948 | Average Training Loss: 4.414796785567887e-05
Train Epoch: 2 | Loss: 0.00941787101328373 | Average Training Loss: 8.74034158186987e-05
Train Epoch: 3 | Loss: 0.034844495356082916 | Average Training Loss: 3.403639857424423e-05
Train Epoch: 3 | Loss: 0.02588096633553505 | Average Training Loss: 7.244427251862362e-05
Train Epoch: 4 | Loss: 0.013644549995660782 | Average Training Loss: 2.925907028838992e-05
Train Epoch: 4 | Loss: 0.02555500715970993 | Average Training Loss: 5.628174767480232e-05
Train Epoch: 5 | Loss: 0.01905379258096218 | Average Training Loss: 2.2748397896066308e-05
Train Epoch: 5 | Loss: 0.02495364099740982 | Average Training Loss: 5.171689917915501e-05
Train Epoch: 6 | Loss: 0.010910824872553349 | Average Training Loss: 2.0712863260996528e-05
Trai

In [24]:
# Testing

test_loss = 0.0
correct = 0.0

for data, target in test_loader:
    data = data.view(-1, 28*28)

    net_out = net(data)
    l = criterion(net_out, target)
    test_loss += l.data

    pred = net_out.data.max(1)[1] # get the index of maximum log- probability

    correct += pred.eq(target.data).sum()

test_loss /= len(test_loader.dataset)

print(f'Test: Average loss: {test_loss} | Accuracy: {correct / len(test_loader.dataset)}')


Test: Average loss: 0.00034587705158628523 | Accuracy: 0.9801999926567078


# Save Model

In [15]:
for param_tensor in net.state_dict():
    print(param_tensor, net.state_dict()[param_tensor].size())

for var_name in optimizer.state_dict():
    print(var_name, optimizer.state_dict()[var_name])


fc1.weight torch.Size([200, 784])
fc1.bias torch.Size([200])
fc2.weight torch.Size([200, 200])
fc2.bias torch.Size([200])
fc3.weight torch.Size([10, 200])
fc3.bias torch.Size([10])
state {0: {'momentum_buffer': tensor([[ 0.0015,  0.0015,  0.0015,  ...,  0.0015,  0.0015,  0.0015],
        [ 0.0001,  0.0001,  0.0001,  ...,  0.0001,  0.0001,  0.0001],
        [ 0.0026,  0.0026,  0.0026,  ...,  0.0026,  0.0026,  0.0026],
        ...,
        [ 0.0011,  0.0011,  0.0011,  ...,  0.0011,  0.0011,  0.0011],
        [ 0.0005,  0.0005,  0.0005,  ...,  0.0005,  0.0005,  0.0005],
        [-0.0008, -0.0008, -0.0008,  ..., -0.0008, -0.0008, -0.0008]])}, 1: {'momentum_buffer': tensor([-3.5123e-03, -3.0774e-04, -6.2156e-03,  3.8736e-03, -5.1608e-04,
         3.5619e-03, -2.2606e-03,  5.1044e-04, -5.1991e-04, -1.2514e-03,
        -3.4784e-04,  1.5793e-03,  1.7618e-03, -2.6711e-04, -2.0369e-03,
        -1.3213e-03, -4.3378e-03, -3.6281e-03,  5.1811e-03, -3.3090e-03,
        -2.5672e-03,  1.9122e-03,  2.2

In [16]:
torch.save(net.state_dict(), '../mnist_demo.pt')

In [18]:
# check
!ls -l ../mnist_demo.pt

-rw-r--r-- 1 root root 798895 Oct 11 13:43 ../mnist_demo.pt


In [19]:
# Load saved model

model = Net()
model.load_state_dict(torch.load('../mnist_demo.pt'))
model.eval()

Net(
  (fc1): Linear(in_features=784, out_features=200, bias=True)
  (fc2): Linear(in_features=200, out_features=200, bias=True)
  (fc3): Linear(in_features=200, out_features=10, bias=True)
)

In [20]:
# Testing

test_loss = 0.0
correct = 0.0

for data, target in test_loader:
    data = data.view(-1, 28*28)

    net_out = model(data)
    l = criterion(net_out, target)
    test_loss += l.data

    pred = net_out.data.max(1)[1] # get the index of maximum log- probability

    correct += pred.eq(target.data).sum()

test_loss /= len(test_loader.dataset)

print(f'Test: Average loss: {test_loss} | Accuracy: {correct / len(test_loader.dataset)}')


Test: Average loss: 0.00035307055804878473 | Accuracy: 0.9778000116348267
