In [1]:
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# import MNIST Dataset
train_dataset = datasets.MNIST(root='./MNISTdata',
                               train=True,
                               transform=transforms.ToTensor(),
                               download=True)

test_dataset = datasets.MNIST(root='./MNISTdata',
                              train= False,
                              transform = transforms.ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNISTdata/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./MNISTdata/MNIST/raw/train-images-idx3-ubyte.gz to ./MNISTdata/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNISTdata/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./MNISTdata/MNIST/raw/train-labels-idx1-ubyte.gz to ./MNISTdata/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNISTdata/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./MNISTdata/MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNISTdata/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNISTdata/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./MNISTdata/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNISTdata/MNIST/raw



In [3]:
# creating dataloader
train_loader = DataLoader(dataset=train_dataset,
                          batch_size = 64,
                          shuffle = True)

test_loader = DataLoader(dataset=test_dataset, 
                         batch_size=64, 
                         shuffle=False)


In [4]:
# Fully connected neural network with one hidden layer
class NeuralNet(nn.Module):
    def __init__(self):
        super(NeuralNet, self).__init__()
        self.l1 = nn.Linear(784, 500) 
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(500, 10)  
    
    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        # no activation and no softmax at the end
        return out

model = NeuralNet()

In [5]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) 

In [6]:
# Train the model
n_total_steps = len(train_loader)
num_epochs = 3

In [7]:
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):  
        # resized: [100, 784]
        images = images.reshape(-1, 28*28)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')

Epoch [1/3], Step [100/938], Loss: 0.2986
Epoch [1/3], Step [200/938], Loss: 0.2575
Epoch [1/3], Step [300/938], Loss: 0.3076
Epoch [1/3], Step [400/938], Loss: 0.2111
Epoch [1/3], Step [500/938], Loss: 0.1209
Epoch [1/3], Step [600/938], Loss: 0.2645
Epoch [1/3], Step [700/938], Loss: 0.2233
Epoch [1/3], Step [800/938], Loss: 0.0964
Epoch [1/3], Step [900/938], Loss: 0.0416
Epoch [2/3], Step [100/938], Loss: 0.1517
Epoch [2/3], Step [200/938], Loss: 0.1134
Epoch [2/3], Step [300/938], Loss: 0.1101
Epoch [2/3], Step [400/938], Loss: 0.0338
Epoch [2/3], Step [500/938], Loss: 0.1029
Epoch [2/3], Step [600/938], Loss: 0.0250
Epoch [2/3], Step [700/938], Loss: 0.0425
Epoch [2/3], Step [800/938], Loss: 0.0460
Epoch [2/3], Step [900/938], Loss: 0.0793
Epoch [3/3], Step [100/938], Loss: 0.0554
Epoch [3/3], Step [200/938], Loss: 0.0264
Epoch [3/3], Step [300/938], Loss: 0.0312
Epoch [3/3], Step [400/938], Loss: 0.0336
Epoch [3/3], Step [500/938], Loss: 0.0262
Epoch [3/3], Step [600/938], Loss:

In [8]:
# Test the model
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 28*28)

        outputs = model(images)

        _, predicted = torch.max(outputs.data, 1)
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()

    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network on the 10000 test images: {acc} %')

Accuracy of the network on the 10000 test images: 97.46 %
