<a href="https://colab.research.google.com/github/swapnilxi/A-computer-vision/blob/main/CNN_U2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
#Define CNN Architecture

class CNN(nn.Module):
    def __init__(self):
        # Initialize the class as a subclass of nn.Module
        # First convolutional layera
        self.conv1 = nn.Conv2d(3, 6, 5)  # 3 input channels, 6 output channels, 5x5 kernel

        # First max pooling layer
        self.pool1 = nn.MaxPool2d(2, 2)  # 2x2 kernel, stride of 2

        # Second convolutional layer
        self.conv2 = nn.Conv2d(6, 16, 5)  # 6 input channels, 16 output channels, 5x5 kernel

        # Second max pooling layer
        self.pool2 = nn.MaxPool2d(2, 2)  # 2x2 kernel, stride of 2

        # 3 Fully connected layers
        # Linear transformation to 120-dimensional space
        self.fc1 = nn.Linear(16 * 5 * 5, 120)  # Flattening the input, 16*5*5 input features, 120 output features

        # Linear transformation to 84-dimensional space
        self.fc2 = nn.Linear(120, 84)  # 120 input features, 84 output features

        # Linear transformation to 10-dimensional space (output classes)
        self.fc3 = nn.Linear(84, 10)  # 84 input features, 10 output features (number of classes)

    def forward(self, x):
        # Operation 1: First convolutional layer with ReLU activation and max pooling
        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool1(x)

        # Operation 2: Second convolutional layer with ReLU activation and max pooling
        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool2(x)

        # Operation 3: Flattened Layer: Reshape for fully connected layer
        x = x.view(-1, 16 * 5 * 5)

        # Operation 4: First fully connected layer with ReLU activation
        x = self.fc1(x)
        x = F.relu(x)

        # Operation 5: Second fully connected layer with ReLU activation
        x = self.fc2(x)
        x = F.relu(x)

        # Operation 6: Output layer (fully connected) with raw scores for each class
        x = self.fc3(x)

        return x



In [None]:
model = CNN()
print(model)

CNN(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [None]:
from torchsummary import summary
summary(model, (3, 32, 32))  # Input shape: (channels, height, width)


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 28, 28]             456
         MaxPool2d-2            [-1, 6, 14, 14]               0
            Conv2d-3           [-1, 16, 10, 10]           2,416
         MaxPool2d-4             [-1, 16, 5, 5]               0
            Linear-5                  [-1, 120]          48,120
            Linear-6                   [-1, 84]          10,164
            Linear-7                   [-1, 10]             850
Total params: 62,006
Trainable params: 62,006
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.06
Params size (MB): 0.24
Estimated Total Size (MB): 0.31
----------------------------------------------------------------


# Incorporating CIFR Dataset

In [None]:
from torch.utils.data import DataLoader
import torchvision
from torchvision import datasets, transforms

# Define the data transform for Data Augmentation
transform = transforms.Compose([transforms.RandomHorizontalFlip(),
                                transforms.RandomRotation(10),
                                transforms.RandomResizedCrop(32, scale=(0.8, 1.0), ratio=(1.0,1.0)),
                                transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

In [None]:
# Load the data
#Training data
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=5, shuffle=True, num_workers=2)

#Test Data
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=2)

#CIFAR-10 CLASSES
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

100%|██████████| 170M/170M [00:03<00:00, 48.9MB/s]


In [None]:
# Print the lengths of the trainset and testset
print("Trainset length:", len(trainset))
print("Testset length:", len(testset))

Trainset length: 50000
Testset length: 10000


# Deep CNN From Scratch


In [3]:
#Define CNN Architecture

class CNN(nn.Module):
    def __init__(self):
        # Initialize the class as a subclass of nn.Module
        # First convolutional layera
        self.conv1 = nn.Conv2d(3, 6, 5)  # 3 input channels, 6 output channels, 5x5 kernel

        # First max pooling layer
        self.pool1 = nn.MaxPool2d(2, 2)  # 2x2 kernel, stride of 2

        # Second convolutional layer
        self.conv2 = nn.Conv2d(6, 16, 5)  # 6 input channels, 16 output channels, 5x5 kernel

        # Second max pooling layer
        self.pool2 = nn.MaxPool2d(2, 2)  # 2x2 kernel, stride of 2

        # 3 Fully connected layers
        # Linear transformation to 120-dimensional space
        self.fc1 = nn.Linear(16 * 5 * 5, 120)  # Flattening the input, 16*5*5 input features, 120 output features

        # Linear transformation to 84-dimensional space
        self.fc2 = nn.Linear(120, 84)  # 120 input features, 84 output features

        # Linear transformation to 10-dimensional space (output classes)
        self.fc3 = nn.Linear(84, 10)  # 84 input features, 10 output features (number of classes)

    def forward(self, x):
        # Operation 1: First convolutional layer with ReLU activation and max pooling
        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool1(x)

        # Operation 2: Second convolutional layer with ReLU activation and max pooling
        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool2(x)

        # Operation 3: Flattened Layer: Reshape for fully connected layer
        x = x.view(-1, 16 * 5 * 5)

        # Operation 4: First fully connected layer with ReLU activation
        x = self.fc1(x)
        x = F.relu(x)

        # Operation 5: Second fully connected layer with ReLU activation
        x = self.fc2(x)
        x = F.relu(x)

        # Operation 6: Output layer (fully connected) with raw scores for each class
        x = self.fc3(x)

        return x



In [4]:
#Define DEEP CNN Architecture
class DeepCNN(nn.Module):
    def __init__(self):
        super(DeepCNN, self).__init__()

        # Original Convolutional Layers
        self.conv1_in_channels = 3
        self.conv1_out_channels = 8
        self.conv1_kernel_size = 3
        self.conv1 = nn.Conv2d(self.conv1_in_channels, self.conv1_out_channels, kernel_size=self.conv1_kernel_size)


        self.conv2_in_channels = self.conv1_out_channels
        self.conv2_out_channels = 16
        self.conv2_kernel_size = 2
        self.conv2 = nn.Conv2d(self.conv2_in_channels, self.conv2_out_channels, kernel_size=self.conv2_kernel_size)

        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        # 2 Additional Convolutional Layers
        self.conv3_in_channels = self.conv2_out_channels
        self.conv3_out_channels = 32
        self.conv3_kernel_size = 2
        self.conv3 = nn.Conv2d(self.conv3_in_channels, self.conv3_out_channels, kernel_size=self.conv3_kernel_size)

        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv4_in_channels = self.conv3_out_channels
        self.conv4_out_channels = 64
        self.conv4_kernel_size = 1
        self.conv4 = nn.Conv2d(self.conv4_in_channels, self.conv4_out_channels, kernel_size=self.conv4_kernel_size)

        # Fully Connected Layers
        self.fc1_in_features = self.conv4_out_channels * 6 * 6
        self.fc1_out_features = 100
        self.fc1 = nn.Linear(self.fc1_in_features, self.fc1_out_features)

        self.fc2_in_features = self.fc1_out_features
        self.fc2_out_features = 10
        self.fc2 = nn.Linear(self.fc2_in_features, self.fc2_out_features)

    def forward(self, x):
        # Original Operations
        x = self.conv1(x)
        x = F.relu(x)

        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool1(x)

        # Additional Convolutional Layer Operation
        x = self.conv3(x)
        x = F.relu(x)
        x = self.pool2(x)

        x = self.conv4(x)
        x = F.relu(x)

        # Faltten layer: Reshape for Fully Connected Layer
        x = x.view(-1, self.fc1_in_features)

        # Fully Connected Layers
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)

        return x

In [5]:
model = DeepCNN()
print(model)

DeepCNN(
  (conv1): Conv2d(3, 8, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(8, 16, kernel_size=(2, 2), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(16, 32, kernel_size=(2, 2), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv4): Conv2d(32, 64, kernel_size=(1, 1), stride=(1, 1))
  (fc1): Linear(in_features=2304, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=10, bias=True)
)


#using the gpu

In [6]:
device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [7]:
from torchsummary import summary
summary(model, (3, 32, 32))  # Input shape: (channels, height, width)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 30, 30]             224
            Conv2d-2           [-1, 16, 29, 29]             528
         MaxPool2d-3           [-1, 16, 14, 14]               0
            Conv2d-4           [-1, 32, 13, 13]           2,080
         MaxPool2d-5             [-1, 32, 6, 6]               0
            Conv2d-6             [-1, 64, 6, 6]           2,112
            Linear-7                  [-1, 100]         230,500
            Linear-8                   [-1, 10]           1,010
Total params: 236,454
Trainable params: 236,454
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.25
Params size (MB): 0.90
Estimated Total Size (MB): 1.16
----------------------------------------------------------------


#Data loding and transform from torchvision

In [8]:
from torch.utils.data import DataLoader
import torchvision
from torchvision import datasets, transforms

# Define the data transform for Data Augmentation
transform = transforms.Compose([transforms.RandomHorizontalFlip(),
                                transforms.RandomRotation(10),
                                transforms.RandomResizedCrop(32, scale=(0.8, 1.0), ratio=(1.0,1.0)),
                                transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

In [9]:
# Load the data
#Training data
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=16, shuffle=True, num_workers=2)

#Test Data
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=16, shuffle=False, num_workers=2)

#CIFAR-10 CLASSES
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

100%|██████████| 170M/170M [00:04<00:00, 42.1MB/s]


In [13]:
# Print the lengths of the trainset and testset
print("Trainset length:", len(trainset))
print("Testset length:", len(testset))

Trainset length: 50000
Testset length: 10000


In [12]:
trainset.data[:3]

array([[[[ 59,  62,  63],
         [ 43,  46,  45],
         [ 50,  48,  43],
         ...,
         [158, 132, 108],
         [152, 125, 102],
         [148, 124, 103]],

        [[ 16,  20,  20],
         [  0,   0,   0],
         [ 18,   8,   0],
         ...,
         [123,  88,  55],
         [119,  83,  50],
         [122,  87,  57]],

        [[ 25,  24,  21],
         [ 16,   7,   0],
         [ 49,  27,   8],
         ...,
         [118,  84,  50],
         [120,  84,  50],
         [109,  73,  42]],

        ...,

        [[208, 170,  96],
         [201, 153,  34],
         [198, 161,  26],
         ...,
         [160, 133,  70],
         [ 56,  31,   7],
         [ 53,  34,  20]],

        [[180, 139,  96],
         [173, 123,  42],
         [186, 144,  30],
         ...,
         [184, 148,  94],
         [ 97,  62,  34],
         [ 83,  53,  34]],

        [[177, 144, 116],
         [168, 129,  94],
         [179, 142,  87],
         ...,
         [216, 184, 140],
        

# Choosing loss function and optimizer

In [14]:
import torch.optim as optim

# Define the loss function
criterion = nn.CrossEntropyLoss()

# Define the  optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Define the learning rate scheduler
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

## Training the Data

In [15]:
# Training loop
for epoch in range(30):
    train_loss = 0  # Initialize training loss accumulator for the epoch
    train_acc = 0   # Initialize training accuracy accumulator for the epoch

    model.train()   # Set the model to training mode

    # Iterate over the training data loader
    for i, (inputs, labels) in enumerate(trainloader):
        inputs=inputs.to(device)
        labels=labels.to(device)
        optimizer.zero_grad()   # Clear previously calculated gradients

        outputs = model(inputs)   # Forward pass: compute model predictions
        loss = criterion(outputs, labels)   # Compute the loss between model predictions and ground truth labels
        loss.backward()   # Backward pass: compute gradients of loss w.r.t. model parameters
        optimizer.step()   # Update model parameters using the optimizer

        train_loss += loss.item()   # Accumulate the training loss for the current batch

        # Calculate the training accuracy for the current batch
        _, preds = torch.max(outputs, 1)   # Get the predicted class labels
        train_acc += (preds == labels).float().mean()   # Compute accuracy by comparing predictions with true labels

    train_loss /= i + 1   # Calculate average training loss for the epoch
    train_acc /= i + 1   # Calculate average training accuracy for the epoch

    scheduler.step()   # Update the learning rate scheduler

    # Print epoch-wise training loss and accuracy
    print('Epoch {}: Train Loss: {:.4f}, Train Acc: {:.4f}'.format(epoch + 1, train_loss, train_acc))

Epoch 1: Train Loss: 1.6250, Train Acc: 0.4062
Epoch 2: Train Loss: 1.3696, Train Acc: 0.5067
Epoch 3: Train Loss: 1.2549, Train Acc: 0.5528
Epoch 4: Train Loss: 1.1758, Train Acc: 0.5805
Epoch 5: Train Loss: 1.1151, Train Acc: 0.6063
Epoch 6: Train Loss: 1.0666, Train Acc: 0.6231
Epoch 7: Train Loss: 1.0329, Train Acc: 0.6385
Epoch 8: Train Loss: 0.9436, Train Acc: 0.6694
Epoch 9: Train Loss: 0.9252, Train Acc: 0.6737
Epoch 10: Train Loss: 0.9162, Train Acc: 0.6796
Epoch 11: Train Loss: 0.9150, Train Acc: 0.6815
Epoch 12: Train Loss: 0.9063, Train Acc: 0.6844
Epoch 13: Train Loss: 0.9023, Train Acc: 0.6851
Epoch 14: Train Loss: 0.8981, Train Acc: 0.6851
Epoch 15: Train Loss: 0.8842, Train Acc: 0.6922
Epoch 16: Train Loss: 0.8828, Train Acc: 0.6916
Epoch 17: Train Loss: 0.8816, Train Acc: 0.6930
Epoch 18: Train Loss: 0.8784, Train Acc: 0.6941
Epoch 19: Train Loss: 0.8793, Train Acc: 0.6925
Epoch 20: Train Loss: 0.8799, Train Acc: 0.6927
Epoch 21: Train Loss: 0.8816, Train Acc: 0.6916
E

## Testing and evaluating

In [16]:
# Validate the model on the Test Set
with torch.no_grad():   # Context manager to disable gradient calculation
    correct = 0   # Initialize variable to count correct predictions
    total = 0   # Initialize variable to count total number of samples
    for images, labels in testloader:   # Iterate over the test loader
        images=images.to(device)
        labels=labels.to(device)
        outputs = model(images)   # Forward pass: compute model predictions
        _, predicted = torch.max(outputs.data, 1)   # Get the index of the class with the highest probability
        total += labels.size(0)   # Increment the total count by the batch size
        correct += (predicted == labels).sum().item()   # Count the number of correct predictions in the batch

    # Print the accuracy of the model on the Test Images
    print(f'Accuracy of the network on the validation images: {100 * correct / total} %')

Accuracy of the network on the validation images: 67.51 %
