# Imports

In [1]:
import torch
import torch.nn as nn # all nn modules
import torch.optim as optim # optimization algorithms
import torch.nn.functional as F # activation functions like relu, tanh (all functions with no parameters)
from torch.utils.data import DataLoader # helps with daata
import torchvision.datasets as datasets # has many data sets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
from torch.utils.tensorboard import SummaryWriter

# Hyperparameters

In [2]:
input_size = 784
num_classes = 10
# learning_rate = 0.001
learning_rates = [0.1,0.01,0.001,0.0001]
batch_sizes = [64, 512]
# how many data examples we pass in one iteration
# batch_size = 64
epochs = 3

# Load dataset

In [3]:
train_data = datasets.MNIST(root = "data/", train=True, transform=transforms.ToTensor())
# We pass the Dataset as an argument to DataLoader
# This wraps an iterable over our dataset, and supports automatic batching, sampling, shuffling and multiprocess data loading
# Here we define a batch size of 64, i.e. each element in the dataloader iterable will return a batch of 64 features and labels


In [4]:
test_data = datasets.MNIST(root = "data/", train=False, transform=transforms.ToTensor())


# Create a fully connected NN

In [5]:
# inherits from the nn module
# Our first linear layer take input_size, in this case 784 nodes to 512
# and our second linear layer takes 512 to the num_classes we have, in this case 10.
class NN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(NN, self).__init__()
        # input layer
        # fc1 = fully connected layer 1
        self.fc1 = nn.Linear(input_size, 512)
        # self.fc2 = nn.Linear(512, 512)
        self.fc3 = nn.Linear(512, num_classes)
        
        self.initialize_weights()
    
    # create a forward function
    def forward(self, x):
        x = F.relu(self.fc1(x))
        # x = F.relu(self.fc2(x))
        x = self.fc3(x)
        
        return F.log_softmax(x, dim=1)

    def initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_normal_(m.weight, gain=1.0)
                nn.init.constant_(m.bias, 0)
                

In [6]:
# set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [7]:
# 28*28 images passes as 784
# 10 for no of digits
model = NN(784, 10)
# 64 = no of examples (images) mini batch size
x = torch.rand((64, 784))
model(x).shape

torch.Size([64, 10])

# Train the network

In [8]:
def train(dataloader, model, loss_fn, optimizer, step):
    size = len(dataloader.dataset)
    losses = []
    accuracies = []
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        
        # X has shape (64, 1, 28, 28)
        # 64 training examples
        # 1 as we are using gray scale images
        # 28 * 28 height, width        
        # we need to reshape this to (64, 784)
        
        X = X.reshape(X.shape[0],-1)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)
        losses.append(loss.item())

        # Backpropagation
        # set gradients to zero for each batch, so it does not store back prop calculation from previous forward props
        optimizer.zero_grad()
        loss.backward()
        
        # Gradient step
        optimizer.step()
        
        # calculate running training accuracy
        _, predictions = pred.max(1)
        num_correct = (predictions==y).sum()
        running_train_accuracy = float(num_correct)/float(X.shape[0])
        accuracies.append(running_train_accuracy)
        
        writer.add_scalar('Training loss', loss, global_step=step)
        writer.add_scalar('Training accuracy',running_train_accuracy, global_step=step )
        step+=1

        

        if batch % 100 == 0:
            # print(f"batch: {batch}, len:{len(X)}, current: {batch*len(X)}")
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
            
    return losses, accuracies

In [9]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    
    num_batches = len(dataloader)
    
    model.eval()
    test_loss, correct = 0, 0
    
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            X = X.reshape(X.shape[0],-1)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
            
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [10]:
loss_fn = nn.CrossEntropyLoss()

In [11]:
for batch in batch_sizes:
    for learning_rate in learning_rates:
        model = NN(input_size = input_size, num_classes = num_classes).to(device)
        train_loader = DataLoader(dataset=train_data, batch_size = batch, shuffle=True)
        test_loader = DataLoader(dataset=test_data, batch_size = batch, shuffle=True)
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
        scheduler =optim.lr_scheduler.ReduceLROnPlateau(optimizer,patience=5, verbose=True)
        writer = SummaryWriter(f'runs/MNIST/Minibatch size {batch} LR {learning_rate}')
        for epoch in range(epochs):
            print(f"Epoch {epoch+1}\n-------------------------------")
            step = 0
            loss = train(train_loader, model, loss_fn, optimizer, step)
            test(test_loader, model, loss_fn)
            mean_loss = sum(loss)/len(loss)
            scheduler.step(mean_loss)
        print("Done!")
        

Epoch 1
-------------------------------
loss: 2.536458  [    0/60000]
loss: 0.710207  [ 6400/60000]
loss: 0.708102  [12800/60000]
loss: 0.714168  [19200/60000]
loss: 0.820589  [25600/60000]
loss: 0.998126  [32000/60000]
loss: 0.840742  [38400/60000]
loss: 0.730818  [44800/60000]
loss: 0.638966  [51200/60000]
loss: 0.854813  [57600/60000]
Test Error: 
 Accuracy: 77.8%, Avg loss: 0.756733 



TypeError: unsupported operand type(s) for +: 'int' and 'list'

# Check some predictions from testset

In [None]:
for data in test_loader:
    X, y = data
    break

In [None]:
plt.figure(figsize=(10, 10))
for i in range(25):
    ax = plt.subplot(5, 5, i + 1)
    plt.imshow(X[i].view(28,28),cmap='Greys_r')
    plt.title(y[i])
    plt.axis("off")