<a href="https://colab.research.google.com/github/kaustubh41096/Neural-Networks/blob/main/CNN_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [60]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

In [61]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyper-parameters 
conv1_dimension = 3   # Image output dimensions from convlutional layer 1
conv2_dimension = 6   # Image output dimensions from convolutional layer 2
pool_kernel_size = 2  # Pool matrix size is 2x2
stride_pool = 2       # Stride for pool is 2
stride_kernel = 1     # Stride for kernel is 1
kernel_size = 5       # Kernel matrix is 5x5
hidden_size1 = 250    # No of neurons in hidden layer 1
hidden_size2 = 100    # No of neurons in hidden layer 2
num_classes = 10      # No of target labels ( 0 - 9 )  
num_epochs = 3        # No of times network gets trained
batch_size = 100      # No of samples in batch size
learning_rate = 0.001 # Learning rate defines the step the optmizer takes down the gradient slope

In [62]:
# MNIST dataset 
# Download training data from torchvision library and transform it to tensor
train_dataset = torchvision.datasets.MNIST(root='./data', 
                                           train=True, 
                                           transform=transforms.ToTensor(),  
                                           download=True)

# Download testing data from torchvision library and transform it to tensor
test_dataset = torchvision.datasets.MNIST(root='./data', 
                                          train=False, 
                                          transform=transforms.ToTensor())

# Data loader
# Load training and testing data into data loader in batches and shuffle them for greater accuracy, shuffle doesnt matter for testing
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)

In [63]:
for i, (images, labels) in enumerate(train_loader):
  image_shape = images.shape[2]
  break

layer1_size = ((image_shape - kernel_size)/stride_kernel) + 1
layer2_size = ((layer1_size - pool_kernel_size)/stride_pool) + 1
layer3_size = ((layer2_size - kernel_size)/stride_kernel) + 1
layer4_size = ((layer3_size - pool_kernel_size)/stride_pool) + 1

layer4_size = int(layer4_size)

In [64]:
# Fully connected neural network with two hidden layers
class NeuralNet(nn.Module):
    # Define structure of neural network and initialize the class with the in built nn class in PyTorch
    def __init__(self):
        super(NeuralNet, self).__init__()
        self.conv1 = nn.Conv2d(1, conv1_dimension, kernel_size)
        self.pool = nn.MaxPool2d(pool_kernel_size, stride_pool)
        self.conv2 = nn.Conv2d(conv1_dimension, conv2_dimension, kernel_size)
        self.l1 = nn.Linear(conv2_dimension*layer4_size*layer4_size, hidden_size1)                     
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(hidden_size1, hidden_size2)
        self.l3 = nn.Linear(hidden_size2, num_classes)
    
    # Define feed forward method for neural network
    def forward(self, x):
      x = self.conv1(x)
      x = self.relu(x)
      x = self.pool(x)
      x = self.conv2(x)
      x = self.relu(x)
      x = self.pool(x)
      x = x.view(-1, conv2_dimension*layer4_size*layer4_size) # Image needs to be flattened for linear layers of neural network
      x = self.l1(x)
      x = self.relu(x)
      x = self.l2(x)
      x = self.relu(x)
      x = self.l3(x)
      # no activation and no softmax at the end
      # softmax is included in the CrossEntropyLoss function
      return x

In [65]:
model = NeuralNet().to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  

In [66]:
# Train the model
n_total_steps = len(train_loader)
for epoch in range(num_epochs): # This loop repeats the process of training the network with all batches till it reaches its optimal configuration
    for i, (images, labels) in enumerate(train_loader): # This loop repeats the process of feed forward and backpropagation for each batch of inputs 
        images = images.to(device) 
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad() # Necessary so that each batch has a clean slate to optimize from
        loss.backward() # Backpropogate
        optimizer.step() # Takes a step to local minima of the gradiant in the size of the learning rate 
        
        if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')

Epoch [1/3], Step [100/600], Loss: 0.7080
Epoch [1/3], Step [200/600], Loss: 0.3039
Epoch [1/3], Step [300/600], Loss: 0.2494
Epoch [1/3], Step [400/600], Loss: 0.1170
Epoch [1/3], Step [500/600], Loss: 0.4086
Epoch [1/3], Step [600/600], Loss: 0.1356
Epoch [2/3], Step [100/600], Loss: 0.1561
Epoch [2/3], Step [200/600], Loss: 0.0671
Epoch [2/3], Step [300/600], Loss: 0.0672
Epoch [2/3], Step [400/600], Loss: 0.1377
Epoch [2/3], Step [500/600], Loss: 0.0679
Epoch [2/3], Step [600/600], Loss: 0.1196
Epoch [3/3], Step [100/600], Loss: 0.0283
Epoch [3/3], Step [200/600], Loss: 0.1368
Epoch [3/3], Step [300/600], Loss: 0.0932
Epoch [3/3], Step [400/600], Loss: 0.1123
Epoch [3/3], Step [500/600], Loss: 0.0444
Epoch [3/3], Step [600/600], Loss: 0.1215


In [67]:
# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for images, labels in test_loader: # Test loader will load one batch of images at a time
        images = images.to(device) 
        labels = labels.to(device)
        outputs = model(images)   # Output classes for each batch
        # max returns (value ,index)
        _, predicted = torch.max(outputs.data, 1)
        n_samples += labels.size(0)                       # Count the no of samples in each batch
        n_correct += (predicted == labels).sum().item()   # Count no of correct predctions for each batch size and add them to the previous count

    acc = 100.0 * n_correct / n_samples                   # Calculate after all the batches have been executed
    print(f'Accuracy of the network on the 10000 test images: {acc} %')   # Print accuracy

Accuracy of the network on the 10000 test images: 97.72 %
