In [395]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torch.optim as optim
import matplotlib as plt

1. Implement convolution operation for a sample image of shape (H=6, W=6, C=1) with a random kernel of size (3,3) using torch.nn.functional.conv2d. What is the dimension of the output image? Apply, various values for parameter stride=1 and note the change in the dimension of the output image. Arrive at an equation for the
output image size with respect to the kernel size and stride and verify your answer with code. Now, repeat the exercise by changing padding parameter. Obtain a formula using kernel, stride, and padding to get the output image size. What is the total number of parameters in your network? Verify with code.

In [384]:
image = torch.rand(6,6)
print("image =", image)

image = image.unsqueeze(dim=0)
image = image.unsqueeze(dim=0)
print("image.shape =", image.shape)

image = tensor([[0.6682, 0.9679, 0.8501, 0.1326, 0.8432, 0.7092],
        [0.4298, 0.1764, 0.7986, 0.4790, 0.1699, 0.3899],
        [0.4052, 0.8402, 0.8786, 0.1813, 0.2266, 0.5991],
        [0.4666, 0.2996, 0.6693, 0.2625, 0.9928, 0.6107],
        [0.0127, 0.3418, 0.1829, 0.4901, 0.0444, 0.4862],
        [0.8515, 0.6398, 0.9478, 0.8282, 0.1990, 0.7235]])
image.shape = torch.Size([1, 1, 6, 6])


In [385]:
kernel = torch.ones(3,3)
print("kernel =", kernel)

kernel = kernel.unsqueeze(dim=0)
kernel = kernel.unsqueeze(dim=0)
print("kernel.shape =", kernel.shape)

kernel = tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])
kernel.shape = torch.Size([1, 1, 3, 3])


In [386]:
outimage = F.conv2d(input=image, weight=kernel, stride=1,padding=0)
print("outimage =", outimage)
print("outimage.shape =", outimage.shape)

outimage = tensor([[[[6.0153, 5.3050, 4.5601, 3.7308],
          [4.9645, 4.5857, 4.6587, 3.9118],
          [4.0970, 4.1465, 3.9286, 3.8937],
          [4.4120, 4.6621, 4.6170, 4.6374]]]])
outimage.shape = torch.Size([1, 1, 4, 4])


In [387]:
def params(image,kernel):
    prod=1
    for i in kernel.shape:
        prod*=i
    return prod

parameters=params(image,kernel)
print("Parameters = ",parameters)   

Parameters =  9


2. Apply torch.nn.Conv2d to the input image of Qn 1 with out-channel=3 and observe the
output. Implement the equivalent of torch.nn.Conv2d using the torch.nn.functional.conv2D
to get the same output. You may ignore bias.

In [388]:
conv_layer = nn.Conv2d(in_channels=1, out_channels=3, kernel_size=3, stride=1, padding=0, bias=False)
outimage = conv_layer(image)
print("outimage =", outimage)
print("outimage.shape =", outimage.shape)
        

outimage = tensor([[[[-0.2562, -0.2969, -0.2779, -0.1980],
          [-0.2255, -0.0072, -0.2318, -0.0067],
          [-0.1231, -0.3065,  0.0674, -0.2673],
          [-0.3954, -0.1669, -0.3183, -0.4268]],

         [[-0.3485, -0.2380, -0.3286, -0.3164],
          [-0.0843, -0.3306,  0.0012, -0.0596],
          [-0.4455, -0.0499, -0.1697, -0.0499],
          [-0.0805, -0.1769, -0.4195, -0.4036]],

         [[ 0.2007, -0.0661, -0.2742,  0.1649],
          [-0.0172,  0.1161, -0.3460,  0.1679],
          [ 0.1466, -0.1470, -0.3274, -0.0131],
          [ 0.1892,  0.1594,  0.1954,  0.0140]]]],
       grad_fn=<ConvolutionBackward0>)
outimage.shape = torch.Size([1, 3, 4, 4])


In [389]:
outimage = F.conv2d(input=image, weight=kernel, stride=1,padding=0)
print("outimage =", outimage)
print("outimage.shape =", outimage.shape)

outimage = tensor([[[[6.0153, 5.3050, 4.5601, 3.7308],
          [4.9645, 4.5857, 4.6587, 3.9118],
          [4.0970, 4.1465, 3.9286, 3.8937],
          [4.4120, 4.6621, 4.6170, 4.6374]]]])
outimage.shape = torch.Size([1, 1, 4, 4])


3. Implement CNN for classifying digits in MNIST dataset using PyTorch. Display the
classification accuracy in the form of a Confusion matrix. Verify the number of learnable
parameters in the model.

    Training a CNN on an image dataset is similar to training a basic multi-layer feed-forward
    network on numerical data as outlined below.

        Define model architecture
        Load dataset from disk
        Loop over epochs and batches
        Make predictions and compute loss
        Properly zero our gradient, perform backpropagation, and update model parameters

In [396]:
model=nn.Sequential(
    nn.Conv2d(1,64,kernel_size=3),
    nn.ReLU(),
    nn.MaxPool2d((2,2),stride=2),
    nn.Conv2d(64,128,kernel_size=3),
    nn.ReLU(),
    nn.MaxPool2d((2,2),stride=2),
    nn.Conv2d(128,64,kernel_size=3),
    nn.ReLU(),
    nn.MaxPool2d((2,2),stride=2),
    nn.Linear(64,20,bias=True),
    nn.ReLU(),
    nn.Linear(20,10,bias=True)
)   


In [397]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

train_data = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_data = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

In [398]:
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=1000, shuffle=False)

In [393]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(mo   del.parameters(), lr=0.001)


In [394]:
epochs = 10
learning_rate = 0.001

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader)}")


RuntimeError: mat1 and mat2 shapes cannot be multiplied (4096x1 and 64x20)

In [None]:
# Evaluate the model
model.eval()  # Set model to evaluation mode

true_labels = []
pred_labels = []

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        
        true_labels.extend(labels.numpy())
        pred_labels.extend(predicted.numpy())

# Confusion Matrix
cm = confusion_matrix(true_labels, pred_labels)

# Display the Confusion Matrix
plt.figure(figsize=(10, 7))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=list(range(10)), yticklabels=list(range(10)))
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()


In [None]:
model.eval()
true_labels = []
pred_labels = []

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        true_labels.extend(labels.numpy())
        pred_labels.extend(predicted.numpy())