In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
train_dataset = torchvision.datasets.MNIST(root='./', train=True, transform=transforms.ToTensor(), download=True)

0.3%

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST/raw/train-images-idx3-ubyte.gz


100.0%


Extracting ./MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz


100.0%

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST/raw/train-labels-idx1-ubyte.gz
Extracting ./MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/raw/t10k-images-idx3-ubyte.gz



100.0%
100.0%


Extracting ./MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/raw/t10k-labels-idx1-ubyte.gz
Extracting ./MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/raw



In [5]:
test_dataset = torchvision.datasets.MNIST(root='./', train=False, transform=transforms.ToTensor())

In [14]:
# Hyper-parameters 
input_size = 784 # 28x28
hidden_size = 256 
num_classes = 10
EPOCHS = 5
batch_size = 100
learning_rate = 0.001

In [15]:
train_ds = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

In [16]:
test_ds = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [24]:
# Fully connected neural network with one hidden layer
class Feed_Forward_Net(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(Feed_Forward_Net, self).__init__()
        self.input_size = input_size
        self.layer_1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.dropout1 = nn.Dropout(0.2)
        self.layer_2 = nn.Linear(hidden_size, num_classes)  

    def forward(self, x):
        x   = self.layer_1(x)
        x   = self.relu(x)
        x   = self.dropout1(x)
        out = self.layer_2(x)
        # no activation and no softmax at the end
        return out

In [25]:
model = Feed_Forward_Net(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes).to(device)

In [26]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [27]:
def train_step(model, images, labels):
    model.train()
    
    images = images.reshape(-1, 28*28).to(device)
    labels = labels.to(device)
    
    outputs = model(images)
    loss = criterion(outputs, labels)
    loss_val = loss.item()
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    _, predicted = torch.max(outputs.data, 1)
    n_samples = labels.size(0)
    n_correct = (predicted == labels).sum().item()
    acc = 100.0 * n_correct / n_samples
    return loss_val, acc

In [30]:
def test_step(model, images, labels):
    model.eval()
    # origin shape: [100, 1, 28, 28]
    # resized: [100, 784]
    images = images.reshape(-1, 28*28).to(device)
    labels = labels.to(device)

    # Forward pass
    outputs = model(images)
    loss = criterion(outputs, labels)
    loss_val = loss.item()

    # Pytorch need a manual coding for accuracy
    # max returns (value ,index)
    _, predicted = torch.max(outputs.data, 1)           
    n_samples = labels.size(0)
    n_correct = (predicted == labels).sum().item()
    acc = 100.0 * n_correct / n_samples

    return loss_val, acc

In [37]:
from tqdm import tqdm, tqdm_notebook, trange

In [38]:
for epoch in range(EPOCHS):
    with tqdm_notebook(total=len(train_ds), desc=f"Train Epoch {epoch+1}") as pbar:
        train_losses = []
        train_accuracies = []
        
        for i, (images, labels) in enumerate(train_ds):
            loss_val, acc = train_step(model, images, labels)
            
            train_losses.append(loss_val)
            train_accuracies.append(acc)
            
            pbar.update(1)
            pbar.set_postfix_str(f"Loss: {loss_val:.4f} ({np.mean(train_losses):.4f}) Acc: {acc:.3f} ({np.mean(train_accuracies):.3f})")
            
            
    with torch.no_grad():
        with tqdm_notebook(total=len(test_ds), desc=f"Test Epoch {epoch+1}") as pbar:
            test_losses = []
            test_accuracies = []
            
            for images, labels in test_ds:
                loss_val, acc = test_step(model, images, labels)
                
                test_losses.append(loss_val)
                test_accuracies.append(acc)
                
                pbar.update(1)
                pbar.set_postfix_str(f"Loss: {loss_val:.4f} ({np.mean(test_losses):.4f}) Acc: {acc:.3f} ({np.mean(test_accuracies):.3f})")

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  with tqdm_notebook(total=len(train_ds), desc=f"Train Epoch {epoch+1}") as pbar:


Train Epoch 1:   0%|          | 0/600 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  with tqdm_notebook(total=len(test_ds), desc=f"Test Epoch {epoch+1}") as pbar:


Test Epoch 1:   0%|          | 0/100 [00:00<?, ?it/s]

Train Epoch 2:   0%|          | 0/600 [00:00<?, ?it/s]

Test Epoch 2:   0%|          | 0/100 [00:00<?, ?it/s]

Train Epoch 3:   0%|          | 0/600 [00:00<?, ?it/s]

Test Epoch 3:   0%|          | 0/100 [00:00<?, ?it/s]

Train Epoch 4:   0%|          | 0/600 [00:00<?, ?it/s]

Test Epoch 4:   0%|          | 0/100 [00:00<?, ?it/s]

Train Epoch 5:   0%|          | 0/600 [00:00<?, ?it/s]

Test Epoch 5:   0%|          | 0/100 [00:00<?, ?it/s]