# 1. Installing Required Libraries
Installing the required Python libraries: numpy (numerical computations), pillow (image handling), torch (PyTorch library), and torchvision (tools for handling vision datasets and models).

In [25]:
! python -m pip install numpy pillow torch torchvision





# Overview
"In this project, I implemented an image classification pipeline using Convolutional Neural Networks (CNNs) in PyTorch. The goal was to classify images into predefined categories, leveraging the power of deep learning to work with unstructured image data."

# Key Highlights
Deep Learning Expertise:

I designed and trained a CNN from scratch, incorporating multiple convolutional layers, max-pooling, and fully connected layers.
The model achieved strong performance metrics such as accuracy on the test dataset.
Data Preprocessing:

I used PyTorch’s torchvision.transforms for image augmentation (e.g., resizing, normalization) to improve generalization and robustness.
Model Training:

I optimized the model using techniques like stochastic gradient descent (SGD) with learning rate scheduling and momentum.
I monitored the training process using training loss and validation accuracy to prevent overfitting.
Transfer Learning (Optional if Implemented):

I utilized pre-trained models (like ResNet or VGG) for transfer learning to accelerate training and achieve higher accuracy.
Model Evaluation:

I evaluated the model's performance using accuracy, confusion matrices, and other metrics.
Additionally, I implemented visualization techniques like Grad-CAM to interpret which regions of an image influenced the model’s predictions, providing explainability.
Deployment:

The trained model was saved using torch.save() and tested on new images for real-world applicability.


# 2. Importing Libraries
Imports the libraries necessary for neural network creation, data preprocessing, optimization, and visualization.

In [26]:
import numpy as np
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# 3. Data Preprocessing
Converts images to tensors and normalizes them to have a mean of 0.5 and a standard deviation of 0.5 for each channel.

In [27]:

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# 4. Loading the CIFAR-10 Dataset
- Downloads and loads the CIFAR-10 dataset, applying the defined preprocessing.
- Creates data loaders for batch processing during training and testing.

In [28]:
train_data = torchvision.datasets.CIFAR10(root='./data', train=True, transform=transform, download=True)
test_data = torchvision.datasets.CIFAR10(root='./data', train=False, transform=transform, download=True)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=32, shuffle=True, num_workers=2)


# 5. Exploring the Dataset and Defining Class Names
- Retrieves the first image and its label from the training dataset to inspect its dimensions.
- Maps the numeric labels in the CIFAR-10 dataset to human-readable class names.

In [29]:
image, label = train_data[0]

image.size()

torch.Size([3, 32, 32])

In [30]:
class_names = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']


# 6. Creating the CNN Model and Setting up Loss Function and Optimizer
- Defines a CNN with two convolutional layers followed by pooling and fully connected layers for classification
- Initializes the CNN, sets the loss function (cross-entropy for multi-class classification), and defines the SGD optimizer with learning rate and momentum.

In [31]:
class NeuralNet(nn.Module):

    def __init__(self):
        super().__init__()

        self.conv1 = nn.Conv2d(3, 12, 5)  # (12, 28, 28)
        self.pool = nn.MaxPool2d(2, 2)   # (12, 14, 14)
        self.conv2 = nn.Conv2d(12, 24, 5)  # (24, 10, 10) -> (24, 5, 5) -> Flatten (24 * 5 * 5)
        self.fc1 = nn.Linear(24 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x



In [32]:
net = NeuralNet()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

# 7. Training the Model and Model Saving 
- Trains the CNN for 30 epochs by updating model weights to minimize the loss
- Saves the trained model and reloads it for evaluation or further use.


In [33]:
for epoch in range(30):
    print(f'Training epoch {epoch}...')
    running_loss = 0.0

    for i, data in enumerate(train_loader):
        inputs, labels = data

        optimizer.zero_grad()
        outputs = net(inputs)

        loss = loss_function(outputs, labels)
        loss.backward()

        optimizer.step()

        running_loss += loss.item()

    print(f'Loss: {running_loss / len(train_loader):.4f}')

Training epoch 0...
Loss: 2.1893
Training epoch 1...
Loss: 1.7323
Training epoch 2...
Loss: 1.5243
Training epoch 3...
Loss: 1.3960
Training epoch 4...
Loss: 1.3016
Training epoch 5...
Loss: 1.2198
Training epoch 6...
Loss: 1.1493
Training epoch 7...
Loss: 1.0877
Training epoch 8...
Loss: 1.0347
Training epoch 9...
Loss: 0.9924
Training epoch 10...
Loss: 0.9506
Training epoch 11...
Loss: 0.9141
Training epoch 12...
Loss: 0.8781
Training epoch 13...
Loss: 0.8467
Training epoch 14...
Loss: 0.8161
Training epoch 15...
Loss: 0.7863
Training epoch 16...
Loss: 0.7561
Training epoch 17...
Loss: 0.7309
Training epoch 18...
Loss: 0.7045
Training epoch 19...
Loss: 0.6805
Training epoch 20...
Loss: 0.6543
Training epoch 21...
Loss: 0.6327
Training epoch 22...
Loss: 0.6098
Training epoch 23...
Loss: 0.5888
Training epoch 24...
Loss: 0.5655
Training epoch 25...
Loss: 0.5447
Training epoch 26...
Loss: 0.5277
Training epoch 27...
Loss: 0.5057
Training epoch 28...
Loss: 0.4872
Training epoch 29...
Los

In [34]:
# Save the trained model
torch.save(net.state_dict(), 'trained_net.pth')

In [35]:
net = NeuralNet()
net.load_state_dict(torch.load('trained_net.pth'))
net.eval()

NeuralNet(
  (conv1): Conv2d(3, 12, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(12, 24, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=600, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

# 8. Evaluating the Model
Evaluates the model's performance on the test dataset and calculates accuracy.

In [36]:
correct = 0
total = 0

net.eval()

with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy: {accuracy:.2f}%')


Accuracy: 68.28%


# 9. Preprocessing New Images and Making Predictions
- Preprocesses new images for prediction using the trained model.
- Uses the trained model to predict the class of new images and outputs the corresponding class names.

In [37]:
new_transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

def load_image(image_path):
    image = Image.open(image_path)
    image = new_transform(image)
    image = image.unsqueeze(0)
    return image

image_paths = ['example 1.jpg', 'example 2.jpg', 'example 3.jpg']
images = [load_image(img) for img in image_paths]


In [38]:
net.eval()
with torch.no_grad():
    for image in images:
        output = net(image)
        _, predicted = torch.max(output, 1)
        print(f'Prediction: {class_names[predicted.item()]}')


Prediction: dog
Prediction: cat
Prediction: deer
