NAME: EMMANUEL KWAME AYANFUL

# 1. Training basic CNNs from scratch

### Import packages

In [None]:
!pip install torchsummary

In [None]:
import torch
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, SubsetRandomSampler
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
from torchsummary import summary
from torch.optim import Adam, SGD
from PIL import Image

## 1.a. Basic CNN

In [None]:
# Define transform to normalize the train set
transform_train = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Define transform to normalize the test set
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Gathering train and test data
train_data = datasets.CIFAR10(
    'data', train = True,
    download = True, transform = transform_train
)
test_data = datasets.CIFAR10(
    'data', train = False,
    download = True, transform = transform_test
)

# Split into train, validation and test
num_workers = 0 # Specify number of cpu cores to use
batch_size = 10
valid_size = 0.2 # Percentage of train data to be set aside for validation
train_length = len(train_data)
indices = list(range(train_length))
split = int(np.floor(valid_size * train_length))

np.random.shuffle(indices) # Shuffle to introduce randomness

# Get indices for train and validation data
train_idx = indices[split : ]
valid_idx = indices[ : split]

# Randomly sample train data using indices specified.
train_sampler = SubsetRandomSampler(train_idx)
validation_sampler = SubsetRandomSampler(valid_idx)

# Load train, validation and test data
train_loader = DataLoader(
    train_data, num_workers = num_workers,
    batch_size = batch_size, sampler = train_sampler
)
valid_loader = DataLoader(
    train_data, num_workers = num_workers,
    batch_size = batch_size, sampler = validation_sampler
)
test_loader = DataLoader(
    train_data, num_workers = num_workers,
    batch_size = batch_size, shuffle = True
)



In [None]:
# Visualizing a training batch

classes = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog',
           'frog', 'horse', 'ship', 'truck']

# print data informations
dataiter = iter(train_loader)
images, labels = next(dataiter)

def imgShow(img, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]):
    img = img.numpy().transpose((1, 2, 0))
    mean_ = np.array(mean)
    std_ = np.array(std)
    img = std_ * img + mean_
    img = np.clip(img, 0, 1)
    plt.imshow(img)
    
    
fig = plt.figure(1, figsize=(10, 5))
for idx in range(batch_size):
    ax = fig.add_subplot(
        2, batch_size // 2,
        idx + 1, xticks=[], yticks=[]
    )
    imgShow(images[idx])
    ax.set_title(classes[labels[idx]])

### Convolutional Neural Network Architecture 1

In [None]:
class convNet1(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(
            in_channels=3, out_channels=6,
            kernel_size=5, stride=1, padding=1
        )
        self.conv2 = nn.Conv2d(
            in_channels=6, out_channels=16,
            kernel_size=5, stride=1, padding=1
        )
        self.pool = nn.MaxPool2d(
            kernel_size=2, stride=2
        )
        self.fc1 = nn.Linear(16 * 6 * 6, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x))) # (3x32x32) ---> (6x15x15)
        x = self.pool(F.relu(self.conv2(x))) # (6x15x15) ---> (16x6x6)
        x = torch.flatten(x, 1)              # (16x6x6) ---> 120               
        x = F.relu(self.fc1(x))              # 120 ---> 84
        x = F.relu(self.fc2(x))              # 84 ---> 10
        x = self.fc3(x)
        return x

model1 = convNet1().cuda()
summary(model1, (3, 32, 32))   


In [None]:
def trainNetwork(model, lr, trainer, validator, optimizer='sgd', use_cuda=True, trainable_params=None):
    # Ensure the model is on the correct device
    if use_cuda and torch.cuda.is_available():
        model = model.cuda()

    # If no specific parameters are provided, train all parameters
    if trainable_params is None:
        trainable_params = model.parameters()
    else:
        trainable_params = [param for name, param in model.named_parameters() if name in trainable_params]

    # Initialize the optimizer with the specified trainable parameters
    if optimizer == 'adam':
        optim = Adam(trainable_params, lr=lr)
    elif optimizer == 'sgd':
        optim = SGD(trainable_params, lr=lr, momentum=0.9)
    
    criterion = nn.CrossEntropyLoss()  # Loss function

    epochs = 10  # Number of times to loop over data
    track_loss = {'train': [], 'val': []}  # Dictionary to store loss history
    val_loss_min = np.Inf

    for epoch in range(epochs):
        train_loss, valid_loss = 0, 0
        total_train, correct_train = 0, 0
        total_val, correct_val = 0, 0

        # Training
        model.train()  # Turn on dropout for training if specified in network
        for images, labels in trainer:
            if use_cuda and torch.cuda.is_available():
                images, labels = images.cuda(), labels.cuda()
            optim.zero_grad()  # Reset gradients of all optimized tensors
            output = model(images)
            loss = criterion(output, labels)  # Compute loss
            loss.backward()  # Backpropagate errors
            optim.step()  # Perform a single optimization step (parameter update)
            train_loss += loss.item()
            _, pred = torch.max(output, 1)
            total_train += labels.size(0)
            correct_train += (pred == labels).sum()

        # Validating
        model.eval()  # Turn off dropout for evaluation
        for images, labels in validator:
            if use_cuda and torch.cuda.is_available():
                images, labels = images.cuda(), labels.cuda()
            output = model(images)
            loss = criterion(output, labels)  # Compute loss
            valid_loss += loss.item()
            _, pred = torch.max(output, 1)
            total_val += labels.size(0)
            correct_val += (pred == labels).sum()

        # Compute and store loss in loss dictionary
        train_loss /= len(trainer)
        valid_loss /= len(validator)
        track_loss['train'].append(train_loss)
        track_loss['val'].append(valid_loss)

        train_acc = 100 * (correct_train / total_train)
        val_acc = 100 * (correct_val / total_val)

        print(f"[Epoch {epoch+1}]\tTrain loss: {train_loss:.3f}\tTrain Accuracy: {train_acc:.2f}%\tValidation loss: {valid_loss:.3f}\tVal Accuracy: {val_acc:.2f}%")

        if valid_loss <= val_loss_min:
            val_loss_min = valid_loss
            z = type(model).__name__
            torch.save(model.state_dict(), z + '_model.pth')
            print("Model state saved...")

    return track_loss


In [None]:
loss1 = trainNetwork(model1, 0.001, train_loader, valid_loader, use_cuda=True)

# View loss graph
fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(loss1['train'], label='Training loss')
ax.plot(loss1['val'], label='Validation loss')
ax.set_title('Network Performance on CIFAR10-Model_1')
plt.legend()

### Convolutional Neural Network Architecture 2

In [None]:
class convNet2(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(
            in_channels=3, out_channels=32,
            kernel_size=3, stride=1, padding=1
        )
        self.conv2 = nn.Conv2d(
            in_channels=32, out_channels=64,
            kernel_size=3, stride=1, padding=1
        )
        self.pool = nn.MaxPool2d(
            kernel_size=2, stride=2
        )
        self.fc1 = nn.Linear(64 * 8 * 8, 64)
        self.fc2 = nn.Linear(64, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
model2 = convNet2().cuda()
summary(model2, (3, 32, 32))

In [None]:
loss2 = trainNetwork(model2, 0.001, train_loader, valid_loader, use_cuda=True)

# View loss graph
fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(loss2['train'], label='Training loss')
ax.plot(loss2['val'], label='Validation loss')
ax.set_title('Network Performance on CIFAR10-Model_2')
plt.legend()

### Convolutional Neural Network Architecture 3

In [None]:
class convNet3(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(
            in_channels=3, out_channels=32,
            kernel_size=3, stride=1, padding=1
        )
        self.conv2 = nn.Conv2d(
            in_channels=32, out_channels=32,
            kernel_size=3, stride=1, padding=1
        )
        self.pool = nn.MaxPool2d(
            kernel_size=2, stride=2
        )
        self.dropout1 = nn.Dropout(0.3)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(32*16*16, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.dropout1(F.relu(self.conv1(x))) # (3x32x32) ---> (32x16x16)
        x = self.pool(F.relu(self.conv2(x)))     # (32x16x16) ---> (32x8x8)
        x = torch.flatten(x, 1)                  # (16x4x4) ---> 256
        x = self.dropout2(F.relu(self.fc1(x)))   # 8192 ---> 512
        x = self.fc2(x)                          # 128 ---> 10
        return x
    
model3 = convNet3().cuda()
summary(model3, (3, 32, 32))

In [None]:
loss3 = trainNetwork(model3, 0.001, train_loader, valid_loader, use_cuda=True)

# View loss graph
fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(loss3['train'], label='Training loss')
ax.plot(loss3['val'], label='Validation loss')
ax.set_title('Network Performance on CIFAR10-Model_1')
plt.legend()

### Test Performamce of Models on test set

In [None]:
def test(model, use_cuda=False):
    test_loss = 0
    total, correct = 0, 0
    criterion = nn.CrossEntropyLoss() # loss function

    model.eval() # test the model with dropout layers off
    for images,labels in test_loader:
        if use_cuda and torch.cuda.is_available():
            images, labels = images.cuda(), labels.cuda()
        output = model(images)
        loss = criterion(output, labels)
        test_loss += loss.item()
        _, pred = torch.max(output, 1)
        total += labels.size(0)
        correct += (pred == labels).sum()

    test_loss = test_loss / len(test_loader)
    acc = 100 * (correct / total)
    print(f'For {type(model).__name__}:')
    print(f"Test Loss: {test_loss:.3f}")
    print(f"Test Accuracy: {acc:.2f}%")

In [None]:
cnn_models = [model1, model2, model3]
for model in cnn_models:
    test(model, use_cuda=True)
    print("\n")

# 2. Classification with transfer learning

In [None]:
# Define transform to normalize the train set
transform_train = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

# Define transform to normalize the test set
transform_test = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

# Gathering train and test data
train_data = datasets.CIFAR10(
    'data', train = True,
    download = True, transform = transform_train
)
test_data = datasets.CIFAR10(
    'data', train = False,
    download = True, transform = transform_test
)

# Split into train, validation and test
num_workers = 0 # Specify number of cpu cores to use
batch_size = 10
valid_size = 0.2 # Percentage of train data to be set aside for validation
train_length = len(train_data)
indices = list(range(train_length))
split = int(np.floor(valid_size * train_length))

np.random.shuffle(indices) # Shuffle to introduce randomness

# Get indices for train and validation data
train_idx = indices[split : ]
valid_idx = indices[ : split]

# Randomly sample train data using indices specified.
train_sampler = SubsetRandomSampler(train_idx)
validation_sampler = SubsetRandomSampler(valid_idx)

# Load train, validation and test data
train_loader = DataLoader(
    train_data, num_workers = num_workers,
    batch_size = batch_size, sampler = train_sampler
)
valid_loader = DataLoader(
    train_data, num_workers = num_workers,
    batch_size = batch_size, sampler = validation_sampler
)
test_loader = DataLoader(
    train_data, num_workers = num_workers,
    batch_size = batch_size, shuffle = True
)

In [None]:
# print data informations
dataiter = iter(train_loader)
images, labels = next(dataiter)

fig = plt.figure(1, figsize=(10, 5))
for idx in range(batch_size):
    ax = fig.add_subplot(
        2, batch_size // 2,
        idx + 1, xticks=[], yticks=[]
    )
    imgShow(images[idx])
    ax.set_title(classes[labels[idx]])

### Efficient Net B0

In [None]:
efficientnetB0 = models.efficientnet_b0(weights='IMAGENET1K_V1')
#Freeze the convolutional base (except the final layers)
for param in efficientnetB0.parameters():
    param.requires_grad = False
    
num_ftrs = efficientnetB0.classifier[1].in_features
efficientnetB0.classifier[1] = nn.Linear(num_ftrs, 10)

In [None]:
for name, param in efficientnetB0.named_parameters():
    if param.requires_grad:
        print(f"Parameter {name} requires gradients.")

In [None]:
# trainable_params=[name for name, param in efficientnetB0.named_parameters() if param.requires_grad]
loss_e_netb0 = trainNetwork(efficientnetB0, 0.001, train_loader, valid_loader, use_cuda=True)
# View loss graph
fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(loss_e_netb0['train'], label='Training loss')
ax.plot(loss_e_netb0['val'], label='Validation loss')
ax.set_title('Network Performance on CIFAR10-Model_EfficientNet_B0')
plt.legend()

### Squeezenet1_0

In [None]:
squeezenet1_0 = models.squeezenet1_0(weights='IMAGENET1K_V1')

#Freeze the convolutional base (except the final layers)
for param in squeezenet1_0.parameters():
    param.requires_grad = False

# The original squeezenet model has 1,000 outputs so we change that to 10
squeezenet1_0.classifier[1] = nn.Conv2d(512, 10, kernel_size=(1, 1), stride=(1, 1))
squeezenet1_0.num_classes = 10

for name, param in squeezenet1_0.named_parameters():
    if param.requires_grad:
        print(f"Parameter {name} requires gradients.")

In [None]:
# trainable_params=[name for name, param in squeezenet1_0.named_parameters() if param.requires_grad]
loss_squeezenet1_0 = trainNetwork(squeezenet1_0, 0.001, train_loader, valid_loader, use_cuda=True)

# View loss graph
fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(loss_squeezenet1_0['train'], label='Training loss')
ax.plot(loss_squeezenet1_0['val'], label='Validation loss')
ax.set_title('Network Performance on CIFAR10-Model_SqueezeNet1_0')
plt.legend()

### Shuffle Net

In [None]:
shufflenet = models.shufflenet_v2_x0_5(weights='IMAGENET1K_V1')

#Freeze the convolutional base (except the final layers)
for param in shufflenet.parameters():
    param.requires_grad = False

# Modify the classifier to output 10 classes for CIFAR-10
num_ftrs = shufflenet.fc.in_features
shufflenet.fc = nn.Linear(num_ftrs, 10)

for name, param in shufflenet.named_parameters():
    if param.requires_grad:
        print(f"Parameter {name} requires gradients.")

In [None]:
trainable_params=[name for name, param in shufflenet.named_parameters() if param.requires_grad]
loss_shufflenet = trainNetwork(shufflenet, 0.001, train_loader, valid_loader, use_cuda=True,
                           trainable_params = trainable_params)

# View loss graph
fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(loss_shufflenet['train'], label='Training loss')
ax.plot(loss_shufflenet['val'], label='Validation loss')
ax.set_title('Network Performance on CIFAR10-Model_Shufflenet')
plt.legend()

In [None]:
cnn_models = [efficientnetB0.cuda(), squeezenet1_0.cuda(), shufflenet.cuda()]
for model in cnn_models:
    test(model, use_cuda=True)
    print("\n")

# 3. Saliency Maps

In [None]:
# Load Pretrained Model
model = models.efficientnet_b0(weights='IMAGENET1K_V1')
model.eval()  # Set the model to evaluation mode

# Preprocess the image
def preprocess_image(image_path):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    image = Image.open(image_path).convert('RGB')
    image_tensor = transform(image).unsqueeze(0)  # Add batch dimension
    return image_tensor

# Function to slide an occluding square over the image
def occlude_and_predict(model, image_tensor, occlusion_size=15, stride=5):
    _, _, h, w = image_tensor.size()
    output_map = np.zeros((h, w))  # To store probabilities for each occluded region

    original_output = model(image_tensor)  # Original model output
    original_probabilities = F.softmax(original_output, dim=1).detach().numpy()

    # Get the correct class (highest probability)
    predicted_class = np.argmax(original_probabilities)

    for y in range(0, h - occlusion_size + 1, stride):
        for x in range(0, w - occlusion_size + 1, stride):
            # Create a copy of the image
            occluded_image = image_tensor.clone()
            
            # Zero-out a portion of the image (the occlusion square)
            occluded_image[:, :, y:y + occlusion_size, x:x + occlusion_size] = 0.0
            
            # Forward pass with the occluded image
            output = model(occluded_image)
            probabilities = F.softmax(output, dim=1).detach().numpy()

            # Store the probability of the correct class for the occluded region
            output_map[y:y + occlusion_size, x:x + occlusion_size] = probabilities[0, predicted_class]
    
    return output_map, predicted_class

# Step 4: Generate the occlusion-based saliency map
occlusion_size = 15  # Size of the occluding square
stride = 8  # Stride of the occluding square
output_map, predicted_class = occlude_and_predict(model, image_tensor, occlusion_size, stride)

# Step 5: Visualize the saliency map
def show_saliency_map(output_map, image_path):
    image = Image.open(image_path).convert('RGB')
    plt.figure(figsize=(10, 5))
    
    # Plot original image
    plt.subplot(1, 2, 1)
    plt.imshow(image)
    plt.title("Original Image")
    
    # Plot saliency map
    plt.subplot(1, 2, 2)
    plt.imshow(output_map, cmap='jet', interpolation='nearest')
    plt.title("Occlusion-Based Saliency Map")
    plt.colorbar()
    plt.show()

image_path = '/kaggle/input/dogggg/n02088466_bloodhound.JPEG'
image_tensor = preprocess_image(image_path)

# Show saliency map for the test image
show_saliency_map(output_map, image_path)

In [None]:
predicted_class