In [None]:
# importing the required libraries
import torch
from torchvision import datasets, transforms
import torch.optim as optim
from model import Net
import utils
from utils import train, test, model_summary

In [None]:
# checking if CUDA is available?
cuda = torch.cuda.is_available()
# adding device variable
device = torch.device("cuda" if cuda else "cpu")
print("CUDA Available?", cuda)

In [None]:
# Train data transformations
train_transforms = transforms.Compose([
    transforms.RandomApply([transforms.CenterCrop(22), ], p=0.1),
    transforms.Resize((28, 28)),
    transforms.RandomRotation((-15., 15.), fill=0),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,)),
    ])

# Test data transformations
test_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
    ])

In [None]:
# Downloading the MNIST dataset and applying train and test transforms defined above
train_data = datasets.MNIST('../data', train=True, download=True, transform=train_transforms)
test_data = datasets.MNIST('../data', train=False, download=True, transform=test_transforms)

In [None]:
# define batch size
batch_size = 512

# Use a shuffle=True argument when creating the dataloader. This will shuffle the data before each epoch, which will also ensure that the data is processed in a different order for each epoch.
kwargs = {'batch_size': batch_size, 'shuffle': True, 'num_workers': 2, 'pin_memory': True}

# Defining the train and test load which acts as a iterator each containing image equal to the batch size
train_loader = torch.utils.data.DataLoader(train_data, **kwargs)
test_loader = torch.utils.data.DataLoader(test_data, **kwargs)


In [None]:
import matplotlib.pyplot as plt

# displaying the data as from the train loader
batch_data, batch_label = next(iter(train_loader)) 
fig = plt.figure()

for i in range(12):
  plt.subplot(4,3,i+1)
  plt.tight_layout()
  plt.imshow(batch_data[i].squeeze(0), cmap='gray')
  plt.title(batch_label[i].item())
  plt.xticks([])
  plt.yticks([])

In [None]:
# changing LR to 0.01 abd step_size in 
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

# The scheduler.step() function is typically called after each epoch of training. This ensures that the learning rate is updated at the correct time.
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1, verbose=True)
num_epochs = 20

for epoch in range(1, num_epochs+1):
  print(f'Epoch {epoch}')
  train(model, device, train_loader, optimizer)
  test(model, device, test_loader)
  scheduler.step()

In [None]:
# displaying train/test accuracy and loss
fig, axs = plt.subplots(2,2,figsize=(15,10))
axs[0, 0].plot(utils.train_losses)
axs[0, 0].set_title("Training Loss")
axs[1, 0].plot(utils.train_acc)
axs[1, 0].set_title("Training Accuracy")
axs[0, 1].plot(utils.test_losses)
axs[0, 1].set_title("Test Loss")
axs[1, 1].plot(utils.test_acc)
axs[1, 1].set_title("Test Accuracy")

In [None]:
# displaying model summary
model_summary(model)