Structure:

1. Imports

2. Create Fully Connected Network

3. Set Device

4. Hyperparameters

5. Load Data (Simple MNIST)

6. Initialize network

7. Loss and Optimizer

8. Train Network

9. Check accuracy on training and test to see how good is our model (Eval)


In [1]:
# 1. Imports
import torch
import math
import torch.nn as nn # all neural network modules, nn.Linear, nn.Conv2d, BatchNorm, loss functions
import torch.optim as optim # all optimization algorithms, SGD, Adam, etc.
import torch.nn.functional as F # all functions that dont have any parameters eg: activations like relu
from torch.utils.data import DataLoader # gives easier dataset management and creates mini batches
import torchvision.datasets as datasets # standard public datasets 
import torchvision.transforms as transforms # transforms on dataset


In [2]:
# 2. Create Fully Connected Network
class FCN(nn.Module):
    def __init__(self, input_size, num_classes):
        super().__init__()
        self.fc1 = nn.Linear(in_features=input_size, 
                             out_features=50) # out= (28x28) x 50
        self.fc2 = nn.Linear(in_features=self.fc1.out_features,
                             out_features=num_classes) # out= 50 x 10
        return
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [3]:

# 3. Set Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [4]:

# 4. Hyperparameters
input_size = 28 * 28
num_classes = 10
lr = 0.001
batch_size=64
num_epochs=10

In [5]:

# 5. Load Data (Simple MNIST)
train_dataset=datasets.MNIST(root='dataset/', train=True, 
                             transform=transforms.ToTensor(),
                             download=True)
train_loader = DataLoader(train_dataset, batch_size=batch_size,
                          shuffle=True)
test_dataset=datasets.MNIST(root='dataset/', train=False, 
                             transform=transforms.ToTensor(),
                             download=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size,
                          shuffle=False)

n_iterations = math.ceil(len(train_dataset)/batch_size)


In [6]:


# 6. Initialize network
model = FCN(input_size=input_size, num_classes=num_classes).to(device)


In [7]:

# 7. Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(params=model.parameters(), lr=lr)


In [9]:

# 8. Train Network
model.train()
for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(train_loader):
        data, targets = data.to(device), targets.to(device) # torch.Size([64, 1, 28, 28]), torch.Size([64])
        data=data.view(data.shape[0], -1) # remove the one channel NxHxW torch.Size([64, 784])

        # clean past gradients collected during backprop
        optimizer.zero_grad()

        # Forward Pass - compute predictions
        logits = model(data)
        train_loss = criterion(logits, targets)
        
        if epoch % 2 == 0 and batch_idx % 300 == 0:
            print(f"Epoch : {epoch} Batch/Iterations: {batch_idx}/{n_iterations}, Avg Train Loss: {train_loss:.4f}")

        # Backward Pass - get the gradients
        train_loss.backward()

        # Update our weights
        optimizer.step()


Epoch : 0 Batch/Iterations: 0/938, Avg Train Loss: 0.2325
Epoch : 0 Batch/Iterations: 300/938, Avg Train Loss: 0.2067
Epoch : 0 Batch/Iterations: 600/938, Avg Train Loss: 0.2328
Epoch : 0 Batch/Iterations: 900/938, Avg Train Loss: 0.2294
Epoch : 2 Batch/Iterations: 0/938, Avg Train Loss: 0.1510
Epoch : 2 Batch/Iterations: 300/938, Avg Train Loss: 0.1173
Epoch : 2 Batch/Iterations: 600/938, Avg Train Loss: 0.1867
Epoch : 2 Batch/Iterations: 900/938, Avg Train Loss: 0.1210
Epoch : 4 Batch/Iterations: 0/938, Avg Train Loss: 0.0247
Epoch : 4 Batch/Iterations: 300/938, Avg Train Loss: 0.1144
Epoch : 4 Batch/Iterations: 600/938, Avg Train Loss: 0.1970
Epoch : 4 Batch/Iterations: 900/938, Avg Train Loss: 0.1119
Epoch : 6 Batch/Iterations: 0/938, Avg Train Loss: 0.0365
Epoch : 6 Batch/Iterations: 300/938, Avg Train Loss: 0.0844
Epoch : 6 Batch/Iterations: 600/938, Avg Train Loss: 0.0339
Epoch : 6 Batch/Iterations: 900/938, Avg Train Loss: 0.0868
Epoch : 8 Batch/Iterations: 0/938, Avg Train Los

In [10]:

# 9. Check accuracy on training and test to see how good is our model (Eval)
def check_accuracy(loader, model):
    if loader.dataset.train:
        print("Checking accuracy on train data")
    else:
        print("Checking accuracy on test data")
    num_correct=num_samples=0
    val_loss = 0.0
    model.eval()
    with torch.no_grad():
        for batch_idx, (data, targets) in enumerate(loader):
            data, targets = data.to(device), targets.to(device)
            data=data.view(data.shape[0], -1)

            logits=model(data) # (batch_size, num_classes)
            
            # we need max index in dim=1 which holds num_classes values
            prediction_index = torch.argmax(torch.softmax(logits, dim=1), dim=1) # torch.size([64])
            num_correct += (prediction_index == targets).sum()
            num_samples += prediction_index.shape[0]

            # add avg batch losses multiplied by batch size
            val_loss += criterion(logits, targets).item() * data.shape[0] 
    
    acc = (num_correct / num_samples) * 100
    print(f"Accuracy achieved : {acc:.2f} on dataset: {len(loader.dataset)} \
          and mean loss : {val_loss/len(loader.dataset):.3f}")

check_accuracy(train_loader, model)
check_accuracy(test_loader, model)




Checking accuracy on train data
Accuracy achieved : 98.64 on dataset: 60000           and mean loss : 0.049
Checking accuracy on test data
Accuracy achieved : 97.10 on dataset: 10000           and mean loss : 0.094
