Structure:

1. Imports

2. Create Fully Connected Network

3. Set Device

4. Hyperparameters

5. Load Data (Simple MNIST)

6. Initialize network

7. Loss and Optimizer

8. Train Network

9. Check accuracy on training and test to see how good is our model (Eval)


In [48]:
# 1. Imports
import torch
import math
import torch.nn as nn # all neural network modules, nn.Linear, nn.Conv2d, BatchNorm, loss functions
import torch.optim as optim # all optimization algorithms, SGD, Adam, etc.
import torch.nn.functional as F # all functions that dont have any parameters eg: activations like relu
from torch.utils.data import DataLoader # gives easier dataset management and creates mini batches
import torchvision.datasets as datasets # standard public datasets 
import torchvision.transforms as transforms # transforms on dataset


In [49]:
# 2. Create Fully Connected Network
class FCN(nn.Module):
    def __init__(self, input_size, num_classes):
        super().__init__()
        self.fc1 = nn.Linear(in_features=input_size, 
                             out_features=50) #hidden layer = 50 nodes
        self.fc2 = nn.Linear(in_features=self.fc1.out_features,
                             out_features=num_classes)
        return
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [50]:

# 3. Set Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [51]:

# 4. Hyperparameters
input_size = 28 * 28
num_classes = 10
lr = 0.001
batch_size=64
num_epochs=10
n_iterations = math.ceil(len(train_dataset)/batch_size)


In [52]:

# 5. Load Data (Simple MNIST)
train_dataset=datasets.MNIST(root='dataset/', train=True, 
                             transform=transforms.ToTensor(),
                             download=True)
train_loader = DataLoader(train_dataset, batch_size=batch_size,
                          shuffle=True)
test_dataset=datasets.MNIST(root='dataset/', train=False, 
                             transform=transforms.ToTensor(),
                             download=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size,
                          shuffle=False)


In [53]:


# 6. Initialize network
model = FCN(input_size=input_size, num_classes=num_classes).to(device)


In [54]:

# 7. Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(params=model.parameters(), lr=lr)


In [58]:

# 8. Train Network
model.train()
for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(train_loader):
        data, targets = data.to(device), targets.to(device) # torch.Size([64, 1, 28, 28]), torch.Size([64])
        data=data.view(data.shape[0], -1) # remove the one channel NxHxW torch.Size([64, 784])

        # clean past gradients collected during backprop
        optimizer.zero_grad()

        # Forward Pass - compute predictions
        logits = model(data)
        train_loss = criterion(logits, targets)
        
        if epoch % 2 == 0 and batch_idx % 300 == 0:
            print(f"Epoch : {epoch} Batch_size/Iterations: {batch_idx}/{n_iterations}, Train Loss: {train_loss}")

        # Backward Pass - get the gradients
        train_loss.backward()

        # Update our weights
        optimizer.step()


Epoch : 0 Batch_size/Iterations: 0/938, Train Loss: 0.04015912115573883
Epoch : 0 Batch_size/Iterations: 300/938, Train Loss: 0.051120657473802567
Epoch : 0 Batch_size/Iterations: 600/938, Train Loss: 0.04207490384578705
Epoch : 0 Batch_size/Iterations: 900/938, Train Loss: 0.16693073511123657
Epoch : 2 Batch_size/Iterations: 0/938, Train Loss: 0.04357149451971054
Epoch : 2 Batch_size/Iterations: 300/938, Train Loss: 0.059984687715768814
Epoch : 2 Batch_size/Iterations: 600/938, Train Loss: 0.018003828823566437
Epoch : 2 Batch_size/Iterations: 900/938, Train Loss: 0.03837967664003372
Epoch : 4 Batch_size/Iterations: 0/938, Train Loss: 0.012316723354160786
Epoch : 4 Batch_size/Iterations: 300/938, Train Loss: 0.006459308788180351
Epoch : 4 Batch_size/Iterations: 600/938, Train Loss: 0.0842670127749443
Epoch : 4 Batch_size/Iterations: 900/938, Train Loss: 0.03239819407463074
Epoch : 6 Batch_size/Iterations: 0/938, Train Loss: 0.008761021308600903
Epoch : 6 Batch_size/Iterations: 300/938,

In [62]:

# 9. Check accuracy on training and test to see how good is our model (Eval)
def check_accuracy(loader, model):
    if loader.dataset.train:
        print("Checking accuracy on train data")
    else:
        print("Checking accuracy on test data")
    num_correct=num_samples=0
    val_loss = []
    model.eval()
    with torch.no_grad():
        for batch_idx, (data, targets) in enumerate(loader):
            data, targets = data.to(device), targets.to(device)
            data=data.view(data.shape[0], -1)

            logits=model(data) # (batch_size, num_classes)
            
            # we need max index in dim=1 which holds num_classes values
            prediction_index = torch.argmax(torch.softmax(logits, dim=1), dim=1) # torch.size([64])
            num_correct += (prediction_index == targets).sum()
            num_samples += prediction_index.shape[0]

            val_loss.append(criterion(logits, targets).item()) # add losses for each batch
    
    acc = (num_correct / num_samples) * 100
    print(f"Accuracy achieved : {acc:.2f} on dataset: {len(loader)*batch_size} and mean loss : {sum(val_loss)/len(val_loss):.3f}")

check_accuracy(train_loader, model)
check_accuracy(test_loader, model)




Checking accuracy on train data
Accuracy achieved : 99.48 on dataset: 60032 and mean loss : 0.020
Checking accuracy on test data
Accuracy achieved : 97.18 on dataset: 10048 and mean loss : 0.109
