## 1. Establish a CNN Architecture (Based on Pytorch Library) to classify MINST Dataset, by defining layers (Convolution, pooling, fully connect layer), the hyper-parameters (Kernels,Padding , stride, optimizers, regularization, etc) and running the model in GPU mode.

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the CNN architecture
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=2)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=2)
        self.fc1 = nn.Linear(in_features=32*7*7, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=84)
        self.fc3 = nn.Linear(in_features=84, out_features=10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 32*7*7)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

## 2. Do the same thing with Faster R-CNN.

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Define the architecture for the modified Faster R-CNN
class FasterRCNN(nn.Module):
    def __init__(self):
        super(FasterRCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)  # 10 classes for MNIST

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)  # Flatten before FC
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

## 3. Compare the two models (By using several metrics (Accuracy, F1 score, Loss, Training time))



In [15]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, f1_score
import time

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hyperparameters
learning_rate = 0.001
num_epochs = 10
batch_size = 64

# Load MNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# Function to calculate F1 score
def calculate_f1_score(y_true, y_pred):
    return f1_score(y_true, y_pred, average='weighted')

# Training function
def train_model(model, criterion, optimizer, train_loader, device):
    model.train()
    running_loss = 0.0
    start_time = time.time()  # Start time for epoch
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    end_time = time.time()  # End time for epoch
    return running_loss / len(train_loader), end_time - start_time  # Return loss and training time

# Testing function
def test_model(model, test_loader, device):
    model.eval()
    y_true = []
    y_pred = []
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())
    return accuracy_score(y_true, y_pred), calculate_f1_score(y_true, y_pred)

# Inside your evaluation function, move input tensors to the same device
def evaluate(model, test_loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)  # Move inputs and labels to device
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = correct / total
    return accuracy

# Initialize models
cnn_model = CNN().to(device)
faster_rcnn_model = FasterRCNN().to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
cnn_optimizer = optim.Adam(cnn_model.parameters(), lr=learning_rate)
faster_rcnn_optimizer = optim.Adam(faster_rcnn_model.parameters(), lr=learning_rate)

# Training and evaluation
for epoch in range(num_epochs):
    cnn_loss, cnn_time = train_model(cnn_model, criterion, cnn_optimizer, train_loader, device)
    faster_rcnn_loss, faster_rcnn_time = train_model(faster_rcnn_model, criterion, faster_rcnn_optimizer, train_loader, device)
    cnn_accuracy, cnn_f1_score = test_model(cnn_model, test_loader, device)
    faster_rcnn_accuracy, faster_rcnn_f1_score = test_model(faster_rcnn_model, test_loader, device)

    print(f"Epoch [{epoch+1}/{num_epochs}], CNN Loss: {cnn_loss:.4f}, Faster R-CNN Loss: {faster_rcnn_loss:.4f}")
    print(f"Training Time - CNN: {cnn_time:.2f} seconds, Faster R-CNN: {faster_rcnn_time:.2f} seconds")
    print(f"CNN Accuracy: {cnn_accuracy*100:.2f}%, CNN F1 Score: {cnn_f1_score:.4f}")
    print(f"Faster R-CNN Accuracy: {faster_rcnn_accuracy*100:.2f}%, Faster R-CNN F1 Score: {faster_rcnn_f1_score:.4f}")

# Ensure model and input tensors are on the same device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("-----------")
print("CNN Accuracy:", evaluate(cnn_model.to(device),test_loader,device))
print("Faster RCNN Accuracy:", evaluate(faster_rcnn_model.to(device),test_loader,device))


Epoch [1/10], CNN Loss: 0.1673, Faster R-CNN Loss: 0.1383
Training Time - CNN: 15.98 seconds, Faster R-CNN: 15.33 seconds
CNN Accuracy: 98.33%, CNN F1 Score: 0.9833
Faster R-CNN Accuracy: 98.48%, Faster R-CNN F1 Score: 0.9848
Epoch [2/10], CNN Loss: 0.0492, Faster R-CNN Loss: 0.0418
Training Time - CNN: 15.76 seconds, Faster R-CNN: 15.60 seconds
CNN Accuracy: 98.58%, CNN F1 Score: 0.9859
Faster R-CNN Accuracy: 98.88%, Faster R-CNN F1 Score: 0.9888
Epoch [3/10], CNN Loss: 0.0350, Faster R-CNN Loss: 0.0283
Training Time - CNN: 15.58 seconds, Faster R-CNN: 15.38 seconds
CNN Accuracy: 99.01%, CNN F1 Score: 0.9901
Faster R-CNN Accuracy: 98.97%, Faster R-CNN F1 Score: 0.9897
Epoch [4/10], CNN Loss: 0.0269, Faster R-CNN Loss: 0.0209
Training Time - CNN: 15.49 seconds, Faster R-CNN: 15.39 seconds
CNN Accuracy: 99.09%, CNN F1 Score: 0.9909
Faster R-CNN Accuracy: 99.06%, Faster R-CNN F1 Score: 0.9906
Epoch [5/10], CNN Loss: 0.0218, Faster R-CNN Loss: 0.0151
Training Time - CNN: 15.50 seconds, Fa

## 4. By using retrained models (VGG16 and AlexNet) fine tune your model to the new dataSet,then compare the obtained results to CNN and Faster R-CNN, what is your conclusion.

In [1]:
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load pre-trained models
vgg16 = torchvision.models.vgg16(pretrained=True)
alexnet = torchvision.models.alexnet(pretrained=True)

# Modify classifier layers
num_classes = 10  # MNIST has 10 classes
vgg16.classifier[6] = nn.Linear(4096, num_classes)
alexnet.classifier[6] = nn.Linear(4096, num_classes)

# Convert grayscale images to RGB format
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to fit VGG16 and AlexNet input size
    transforms.Grayscale(num_output_channels=3),  # Convert to RGB
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load MNIST dataset
train_dataset = MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = MNIST(root='./data', train=False, transform=transform)

# Data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
vgg16_optimizer = optim.SGD(vgg16.parameters(), lr=0.001, momentum=0.9)
alexnet_optimizer = optim.SGD(alexnet.parameters(), lr=0.001, momentum=0.9)

# Move models to device
alexnet = alexnet.to(device)
vgg16 = vgg16.to(device)

# Training and evaluation
num_epochs = 3
for epoch in range(num_epochs):
    alexnet_loss, alexnet_time = train_model(alexnet, criterion, alexnet_optimizer, train_loader, device)
    vgg16_loss, vgg16_time = train_model(vgg16, criterion, vgg16_optimizer, train_loader, device)
    alexnet_accuracy, alexnet_f1_score = test_model(alexnet, test_loader, device)
    vgg16_accuracy, vgg16_f1_score = test_model(vgg16, test_loader, device)

    print(f"Epoch [{epoch+1}/{num_epochs}], AlexNet Loss: {alexnet_loss:.4f}, VGG16 Loss: {vgg16_loss:.4f}")
    print(f"Training Time - AlexNet: {alexnet_time:.2f} seconds, VGG16: {vgg16_time:.2f} seconds")
    print(f"AlexNet Accuracy: {alexnet_accuracy*100:.2f}%, AlexNet F1 Score: {alexnet_f1_score:.4f}")
    print(f"VGG16 Accuracy: {vgg16_accuracy*100:.2f}%, VGG16 F1 Score: {vgg16_f1_score:.4f}")

print("-----------")
print("VGG16 Accuracy:", evaluate(vgg16, test_loader))
print("AlexNet Accuracy:", evaluate(alexnet, test_loader))


Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:04<00:00, 129MB/s]
Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth
100%|██████████| 233M/233M [00:03<00:00, 73.7MB/s]


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 111696007.34it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 72492934.66it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 31274097.61it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 11817945.89it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



NameError: name 'train_model' is not defined