<a href="https://colab.research.google.com/github/nitin-vig/ERAv4S4/blob/main/MNIST_Training_in_one_epoch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# MNIST Training in one epoch

In [280]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [281]:
# CUDA?
cuda = torch.cuda.is_available()
print("CUDA Available?", cuda)

CUDA Available? True


In [282]:
import torch.nn as nn
import torch.nn.functional as F

class Network(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(in_channels=1, out_channels=4, kernel_size=3)
    self.conv2 = nn.Conv2d(in_channels=4, out_channels=8, kernel_size=3)

    self.conv3 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3)
    self.conv4 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3)
    self.fc1 = nn.Linear(in_features= 512, out_features=30)
    self.out = nn.Linear(in_features=30, out_features=10)

  def forward(self, t):
    # input layer
    x = t

    ##### BLOCK 1 -Edges and Gradients#####
    # conv1 layer
    x = self.conv1(x)
    x = F.relu(x)
    # conv2 layer
    x = self.conv2(x)
    x = F.relu(x)
    x = F.max_pool2d(x, kernel_size=2, stride=2)

    ##### BLOCK 2 - Shapes and Patterns#####
    # conv3 layer
    x = self.conv3(x)
    x = F.relu(x)
    x = self.conv4(x)
    x = F.relu(x)
    x = F.max_pool2d(x, kernel_size=2, stride=2)


    # reshape
    x = x.reshape(-1,  32 * 4 * 4)

    # fc1 layer
    x = self.fc1(x)
    x = F.relu(x)


    # output layer to 10 outputs
    x = self.out(x)
    # x = F.softmax(x, dim=1) NO softmax as loss function is CrossEntropy
    return x

In [283]:
network = Network()
print(network)
total_params = sum(p.numel() for p in network.parameters())
print(f"Total parameters: {total_params}")

# Count only trainable parameters
trainable_params = sum(p.numel() for p in network.parameters() if p.requires_grad)
print(f"Trainable parameters: {trainable_params}")

Network(
  (conv1): Conv2d(1, 4, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(4, 8, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1))
  (conv4): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=512, out_features=30, bias=True)
  (out): Linear(in_features=30, out_features=10, bias=True)
)
Total parameters: 21844
Trainable parameters: 21844


In [284]:
# Train data transformations
train_transforms = transforms.Compose([
    # transforms.RandomApply([transforms.CenterCrop(22), ], p=0.1),
    # transforms.Resize((28, 28)),
    # transforms.RandomRotation((-15., 15.), fill=0),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,)),
    ])

# Test data transformations
test_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1407,), (0.4081,))
    ])

In [285]:
train_data = datasets.MNIST('../data', train=True, download=True, transform=train_transforms)
test_data = datasets.MNIST('../data', train=False, download=True, transform=train_transforms)

In [286]:
batch_size = 512

kwargs = {'batch_size': batch_size, 'shuffle': False, 'num_workers': 2, 'pin_memory': True}

test_loader = torch.utils.data.DataLoader(test_data, **kwargs)
train_loader = torch.utils.data.DataLoader(train_data, **kwargs)

In [287]:
# Data to plot accuracy and loss graphs
train_losses = []
test_losses = []
train_acc = []
test_acc = []

test_incorrect_pred = {'images': [], 'ground_truths': [], 'predicted_vals': []}

In [288]:

from tqdm import tqdm

def GetCorrectPredCount(pPrediction, pLabels):
  return pPrediction.argmax(dim=1).eq(pLabels).sum().item()

def train(model, device, train_loader, optimizer, criterion):
  model.train()
  pbar = tqdm(train_loader)

  train_loss = 0
  correct = 0
  processed = 0

  for batch_idx, (data, target) in enumerate(pbar):
    data, target = data.to(device), target.to(device)
    optimizer.zero_grad()

    # Predict
    pred = model(data)

    # Calculate loss
    loss = criterion(pred, target)
    train_loss+=loss.item()

    # Backpropagation
    loss.backward()
    optimizer.step()

    correct += GetCorrectPredCount(pred, target)
    processed += len(data)

    pbar.set_description(desc= f'Train: Loss={loss.item():0.4f} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}')

  train_acc.append(100*correct/processed)
  train_losses.append(train_loss/len(train_loader))

def test(model, device, test_loader, criterion):
    model.eval()

    test_loss = 0
    correct = 0

    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(test_loader):
            data, target = data.to(device), target.to(device)

            output = model(data)
            test_loss += criterion(output, target).item()  # sum up batch loss

            correct += GetCorrectPredCount(output, target)


    test_loss /= len(test_loader.dataset)
    test_acc.append(100. * correct / len(test_loader.dataset))
    test_losses.append(test_loss)

    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))


In [289]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")



In [290]:
model = Network().to(device)
# optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
# scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.1)
# scheduler = torch.optim.lr_scheduler.OneCycleLR(
#     optimizer, max_lr=0.1, steps_per_epoch=117, epochs=1)

# Switched to AdamW Optimizer with High learning rate of 0.1
optimizer = optim.AdamW(model.parameters(), lr=0.1, weight_decay=0.01)
scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer, max_lr=0.1, epochs=1, steps_per_epoch=len(train_loader),
    pct_start=0.1, anneal_strategy="cos"
)
# optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)

criterion = nn.CrossEntropyLoss(reduction="mean")
num_epochs = 1

for epoch in range(1, num_epochs+1):
  print(f'Epoch {epoch}')
  train(model, device, train_loader, optimizer, criterion)
  test(model, device, train_loader, criterion)
  scheduler.step()

Epoch 1


Train: Loss=0.3315 Batch_id=117 Accuracy=80.17: 100%|██████████| 118/118 [00:18<00:00,  6.52it/s]


Test set: Average loss: 0.0003, Accuracy: 57191/60000 (95.32%)



In [291]:
!pip install torchsummary
from torchsummary import summary
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 4, 26, 26]              40
            Conv2d-2            [-1, 8, 24, 24]             296
            Conv2d-3           [-1, 16, 10, 10]           1,168
            Conv2d-4             [-1, 32, 8, 8]           4,640
            Linear-5                   [-1, 30]          15,390
            Linear-6                   [-1, 10]             310
Total params: 21,844
Trainable params: 21,844
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.08
Params size (MB): 0.08
Estimated Total Size (MB): 0.17
----------------------------------------------------------------
