# Convolutional Neural Networks - Image Classification [using Pytorch]

### 1. INITIALIZATION

In [10]:
# Import necessary libraries and modules
import matplotlib.pyplot as plt
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

In [2]:
# Check tensorflow version
torch.__version__

'2.2.2'

### 2. LOADING & TRANSFORMING DATASET

The dataset used is [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html), which is a widely used benchmark dataset in the field of computer vision and machine learning. It consists of 60,000 32x32 color images in 10 classes, with 6,000 images per class. It serves as a standard dataset for training and evaluating machine learning algorithms, particularly for image classification tasks.

The classes are: airplane, automobile, bird, cat, deer, dog, frog, horse, ship, and truck.

Before loading the dataset, we need to initialize data augmentation and image transformations to avoid over fitting. This helps to increase the diversity of the dataset and improve the generalization ability of the CNN model.

Transformations of training images are:
- Randomly rotate images
- Center crop to the images
- Randomly flip images horizontally
- Randomly changing the brightness, contrast, saturation, and hue of the image (color jitter)
- Scale the pixel values of the images to be in the range [0, 1] (automatically done when converting to tensor)

In [3]:
# Creating transformer for training data as a series of steps
transform_train = transforms.Compose([
    transforms.RandomRotation(10),
    transforms.CenterCrop(24),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [4]:
# Creating transformer for testing data as a series of steps - without augmentation
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

By separating the transformation pipelines for training and testing, we ensure that the model is trained with data augmentations that help it generalize better, while the test data remains consistent and unmodified to accurately evaluate the model's performance.

In [5]:
# Loading the dataset from pytorch library
data_train = torchvision.datasets.CIFAR10(root="../data/raw", train=True, download=True, transform=transform_train)
data_test = torchvision.datasets.CIFAR10(root="../data/raw", train=False, download=True, transform=transform_test)

Files already downloaded and verified
Files already downloaded and verified


In [6]:
# Initialize data loaders
train_loader = torch.utils.data.DataLoader(data_train, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(data_test, batch_size=32, shuffle=False)

### 4. BUILDING CNN MODEL

In [7]:
class CNN(nn.Module):
    def __init__(self):
        
        super(CNN, self).__init__()
        
        # Define the convolutional layers
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        
        # Define the pooling layers
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        
        # Define the fully connected layers
        self.fc1 = nn.Linear(64 * 6 * 6, 128)  # 64 * 6 * 6 is the flattened dimension after pooling
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)  # 10 output units for 10 classes

    def forward(self, x):
        
        # Apply convolutional layers with ReLU activation and pooling
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        
        # Flatten the output from the convolutional layers
        x = x.view(-1, 64 * 6 * 6)
        
        # Apply fully connected layers with ReLU activation
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        
        # Apply the output layer with softmax activation
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)

In [8]:
# Initialize the model
cnn = CNN()

# Define the loss function (cross-entropy loss)
criterion = nn.CrossEntropyLoss()

# Define the optimizer (Adam optimizer)
optimizer = torch.optim.Adam(cnn.parameters(), lr=0.001)

### 5. TRAINING CNN MODEL

In [None]:
# Initialize metrics function
def compute_metrics(outputs, labels):
    _, preds = torch.max(outputs, 1)
    labels = labels.cpu().numpy()
    preds = preds.cpu().numpy()
    accuracy = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average='weighted')
    precision = precision_score(labels, preds, average='weighted')
    recall = recall_score(labels, preds, average='weighted')
    return accuracy, f1, precision, recall

In [9]:
# Number of epochs to train the model
num_epochs = 25

# Set model to training mode
cnn.train()

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
cnn.to(device)

# Training loop
for epoch in range(num_epochs):
    
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        
        # Initialize parameter gradients
        optimizer.zero_grad()
        
        # Forward pass: compute model output
        outputs = cnn(inputs)
        
        # Compute loss
        loss = criterion(outputs, labels)
        
        # Backward pass: compute gradients
        loss.backward()
        
        # Update model parameters
        optimizer.step()
        
        # Accumulate loss
        running_loss += loss.item()
    
    # Print average loss for each epoch
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')


Epoch [1/25], Loss: 1.5307


KeyboardInterrupt: 

### 6. PERFORMANCE ANALYSIS