## COMP5623 Coursework on Image Classification with Convolutional Neural Networks 

Starter code.

In [0]:
import torch
import torchvision
import torch.optim as optim
import torch.nn as nn
import torchvision.transforms as transforms
from  torch.utils.data import Dataset

from sklearn.metrics import confusion_matrix
from skimage import io, transform

import matplotlib.pyplot as plt
from tqdm import tqdm
from PIL import Image
import pandas as pd
import numpy as np
import csv
import os
import math
import cv2
import timeit


### Part I

The first part of the assignment is to build a CNN and train it on a subset of the ImageNet dataset. We will first create a dataframe with all the references to the images and their labels.

To download the images into your work environment, clone into a git respository containing the images.

In [57]:
! git clone https://github.com/MohammedAlghamdi/imagenet10.git

fatal: destination path 'imagenet10' already exists and is not an empty directory.


Check that the repository is there:

In [58]:
! ls

imagenet10  sample_data


In [0]:
root_dir = "imagenet10/train_set/"
class_names = [
  "baboon",
  "banana",
  "canoe",
  "cat",
  "desk",
  "drill",
  "dumbbell",
  "football",
  "mug",
  "orange",
]

A helper function for reading in images and assigning labels.

In [0]:
def get_meta(root_dir, dirs):
    """ Fetches the meta data for all the images and assigns labels.
    """
    paths, classes = [], []
    for i, dir_ in enumerate(dirs):
        for entry in os.scandir(root_dir + dir_):
            if (entry.is_file()):
                paths.append(entry.path)
                classes.append(i)
                
    return paths, classes

Now we create a dataframe using all the data.

In [0]:
# Benign images we will assign class 0, and malignant as 1
paths, classes = get_meta(root_dir, class_names)

data = {
    'path': paths,
    'class': classes
}

data_df = pd.DataFrame(data, columns=['path', 'class'])
data_df = data_df.sample(frac=1).reset_index(drop=True) # Shuffles the data

View some sample data.

In [62]:
print("Found", len(data_df), "images.")
data_df.head()

Found 9000 images.


Unnamed: 0,path,class
0,imagenet10/train_set/dumbbell/n03255030_11306....,6
1,imagenet10/train_set/dumbbell/n03255030_8667.JPEG,6
2,imagenet10/train_set/banana/n07753592_8698.JPEG,1
3,imagenet10/train_set/cat/n02123159_7316.JPEG,3
4,imagenet10/train_set/dumbbell/n03255030_9844.JPEG,6


Now we will create the Dataset class.

In [0]:
class ImageNet10(Dataset):
    """ ImageNet10 dataset. """

    def __init__(self, df, transform=None):
        """
        Args:
            image_dir (string): Directory with all the images
            df (DataFrame object): Dataframe containing the images, paths and classes
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        # Load image from path and get label
        x = Image.open(self.df['path'][index])
        try:
          x = x.convert('RGB') # To deal with some grayscale images in the data
        except:
          pass
        y = torch.tensor(int(self.df['class'][index]))

        if self.transform:
            x = self.transform(x)

        return x, y

Compute what we should normalise the dataset to.

In [0]:
def compute_img_mean_std(image_paths):
    """
        Author: @xinruizhuang. Computing the mean and std of three channel on the whole dataset,
        first we should normalize the image from 0-255 to 0-1
    """

    img_h, img_w = 224, 224
    imgs = []
    means, stdevs = [], []

    for i in tqdm(range(len(image_paths))):
        img = cv2.imread(image_paths[i])
        img = cv2.resize(img, (img_h, img_w))
        imgs.append(img)

    imgs = np.stack(imgs, axis=3)
    print(imgs.shape)

    imgs = imgs.astype(np.float32) / 255.

    for i in range(3):
        pixels = imgs[:, :, i, :].ravel()  # resize to one row
        means.append(np.mean(pixels))
        stdevs.append(np.std(pixels))

    means.reverse()  # BGR --> RGB
    stdevs.reverse()

    print("normMean = {}".format(means))
    print("normStd = {}".format(stdevs))
    return means, stdevs


In [0]:
norm_mean, norm_std = compute_img_mean_std(paths)

 25%|██▌       | 2292/9000 [00:13<00:50, 132.20it/s]

Now let's create the transforms to normalise and turn our data into tensors.

In [0]:
data_transform = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(256),
        transforms.ToTensor(),
        transforms.Normalize(norm_mean, norm_std),
    ])

Let's split the data into train and test sets and instantiate our new ISIC_Dataset objects.

In [0]:
train_split = 0.70 # Defines the ratio of train/valid/test data.
valid_split = 0.10

train_size = int(len(data_df)*train_split)
valid_size = int(len(data_df)*valid_split)

ins_dataset_train = ImageNet10(
    df=data_df[:train_size],
    transform=data_transform,
)

ins_dataset_valid = ImageNet10(
    df=data_df[train_size:(train_size + valid_size)].reset_index(drop=True),
    transform=data_transform,
)

ins_dataset_test = ImageNet10(
    df=data_df[(train_size + valid_size):].reset_index(drop=True),
    transform=data_transform,
)

You will need to create DataLoaders for the datasets.

In [0]:
train_loader = torch.utils.data.DataLoader(
    ins_dataset_train,
    batch_size=5,
    shuffle=True,
    num_workers=2
)


test_loader = torch.utils.data.DataLoader(
    ins_dataset_test,
    batch_size=12, # Forward pass only so batch size can be larger
    shuffle=False,
    num_workers=2
)


valid_loader = torch.utils.data.DataLoader(
    ins_dataset_valid,
    batch_size=5, # Forward pass only so batch size can be larger
    shuffle=False,
    num_workers=2
)

classes = range(0, 10)

A framework for the ConvNet model, missing all layers except the final fully-connected layer:

In [0]:
# Convolutional neural network
class ConvNet(nn.Module):
    
    def __init__(self, num_classes=10):
      super(ConvNet, self).__init__()

      self.conv1 = nn.Conv2d(3,16,4)
      self.pool = nn.MaxPool2d(2,2)

      self.conv2 = nn.Sequential(
          nn.Conv2d(16,32,4),
          nn.MaxPool2d(2,2),
          nn.ReLU(), 
          nn.Dropout(0.1)
      )  

      self.conv3 = nn.Sequential(
          nn.Conv2d(32,64,4),
          nn.MaxPool2d(2,2),
          nn.ReLU(), 
          nn.Dropout(0.1)
      )

      self.conv4 = nn.Sequential(
          nn.Conv2d(64,128,4),
          nn.MaxPool2d(2,2),
          nn.ReLU(), 
       #   nn.Dropout(0.1)
      )

      self.conv5 = nn.Sequential(
          nn.Conv2d(128,128,4),
          nn.MaxPool2d(2,2),
          nn.ReLU(), 
          #nn.Dropout(0.1)
      )


      self.fc1 = nn.Linear(3200, 512)       
      self.fc2 = nn.Linear(512, num_classes)
      
        
    def forward(self, x):

        # Complete the graph
        x = self.pool(torch.nn.functional.relu(self.conv1(x)))
        x = torch.nn.functional.dropout(x, p= 0.1)

        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        
        x = x.reshape(x.size(0), -1)
        x = self.fc1(x)
        x = self.fc2(x)


        return x



In [0]:
torch.manual_seed(0)

In [0]:
# Device configuration - defaults to CPU unless GPU is available on device
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

In [0]:
model = ConvNet().to(device)
model

In [0]:

criterion = nn.CrossEntropyLoss()
# Stochastic gradient descent
optimizer = optim.SGD(model.parameters(), lr=0.0003, momentum=0.9)

In [0]:
acc_list = []
loss_list = []
total = 0
correct = 0
correctv = 0
totalv = 0
val_acc = []
val_loss_list = []
def train_model_epochs(num_epochs):
    """ Copy of function train_model_epochs but explicitly copying data to device 
        during training. 
    """
    for epoch in range(num_epochs):

        running_loss = 0.0
        batch_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            images, labels = data

            # Explicitly specifies that data is to be copied onto the device!
            images = images.to(device)  # <----------- And note it's NOT an in-place operation; original
            labels = labels.to(device)


            optimizer.zero_grad()
            outputs = model(images)

            loss = criterion(outputs, labels)
            loss.backward()

            optimizer.step()
           # running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)

            global correct, total, acc_list, loss_list
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            # Print our loss
            running_loss += loss.item()
            batch_loss += loss.item()
            if i % 1000 == 999:    # print every 1000 mini-batches
                print('Epoch / Batch [%d / %d] - Loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 1000))
                running_loss = 0.0
            if i == len(train_loader) - 1:
              acc_list.append(correct / total)
              loss_list.append(batch_loss / len(train_loader))
              batch_loss = 0.0
 # train_model_epochs(num_epochs)
        
        with torch.no_grad():
          val_loss = 0.0
          for data in valid_loader:
              imagesv, labelsv = data

              imagesv = imagesv.to(device)  
              labelsv = labelsv.to(device) 
              
              outputsv = model(imagesv)
              lossv = criterion(outputsv, labelsv)

              val_loss += lossv.item()
              
              _, predictedv = torch.max(outputsv.data, 1)
              
              global val_acc,val_loss_list, totalv, correctv
              totalv += labelsv.size(0)
              correctv += (predictedv == labelsv).sum().item()
              

        print('val_loss: %.3f - val_acc:%.3f' % (val_loss / len(valid_loader), (correct / total)))
        val_acc.append(correctv / totalv)
        val_loss_list.append(val_loss / len(valid_loader))
        val_loss = 0.0


In [0]:
cpu_train_time = timeit.timeit(
    "train_model_epochs(num_epochs)",
    setup="num_epochs=10",
    number=1,
    globals=globals(),
)

In [0]:
cpu_train_time

In [0]:
epochs = range(len(acc_list))
nb_epochs = len(epochs)

f2 = plt.figure(2)
plt.subplot(1,2,1)
plt.axis((0, nb_epochs, 0, 1.2))
plt.plot(epochs, acc_list, 'bo', label = 'training acc')
plt.plot(epochs, val_acc, 'b', label = 'validation acc')
plt.title('train and validation acc')
plt.legend()
plt.subplot(1,2,2)
plt.axis((0, nb_epochs, 0, 5))
plt.plot(epochs, loss_list, 'bo', label = 'training loss')
plt.plot(epochs, val_loss_list, 'b', label = 'validation loss')
plt.title('train and validation loss')
plt.legend()
plt.draw()
plt.pause(0.001)

In [0]:
correct = 0
total = 0
labels_list = []
predicted_list = []

# Why don't we need gradients? What happens if we do include gradients?
with torch.no_grad():
    
    # Iterate over the test set
    for data in test_loader:

      images, labels = data
      # Explicitly specifies that data is to be copied onto the device!
      images = images.to(device)  # <----------- And note it's NOT an in-place operation; original
      labels = labels.to(device)

      outputs = model(images)
        
      # torch.max is an argmax operation
      
      _, predicted = torch.max(outputs.data, 1)
      labels_list += labels.tolist()
      predicted_list += predicted.tolist()

      total += labels.size(0)
      correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %d %%' % (100 * correct / total))

In [0]:
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import itertools

cm = confusion_matrix(torch.as_tensor(labels_list), torch.as_tensor(predicted_list))

cm


In [0]:
def plot_confusion_matrix(cm,
                          classes,
                          normalize=True,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix very prettily.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)

    # Specify the tick marks and axis text
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=90)
    plt.yticks(tick_marks, classes)

    # The data formatting
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    
    # Print the text of the matrix, adjusting text colour for display
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()
    plt.show()



In [0]:
plot_confusion_matrix(cm, classes)

In [0]:
convL = model.conv1
kernels = convL.weight.data
kernels = kernels.to("cpu")
kernelNP = kernels.numpy()

filterNumber = 4
picID = 1

for k in range(0, 16):
  graph = plt.subplot(filterNumber, 4, k+1)
  graph.set_xticks([])
  graph.set_yticks([])
  plt.imshow(np.clip(kernels[k], 0, 1)[:,:,0], cmap ='gray') # 0-2 = RGB red 
  picID+=1
plt.show()

for k in range(0, 16):
  graph = plt.subplot(filterNumber, 4, k+1)
  graph.set_xticks([])
  graph.set_yticks([])
  plt.imshow(np.clip(kernels[k], 0, 1)[:,:,1], cmap ='gray') # 0-2 = RGB green
  picID+=1
plt.show()

for k in range(0, 16):
  graph = plt.subplot(filterNumber, 4, k+1)
  graph.set_xticks([])
  graph.set_yticks([])
  plt.imshow(np.clip(kernels[k], 0, 1)[:,:,2], cmap ='gray') # 0-2 = RGB blue
  picID+=1
plt.show()

Feature Maps


In [0]:
with torch.no_grad():
    
    # Iterate over the test set
    for data in test_loader:
        images, labels = data

        images = images.to("cpu")  
        labels = labels.to("cpu") 
        model_cpu = model.to("cpu") 
        
        outputs = model(images)
activation = {}
def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook
    
model.conv1.register_forward_hook(get_activation('conv1'))
data, _ = ins_dataset_train[0]
data.unsqueeze_(0)
output = model(data)

"""

def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook
model_cpu.conv1.register_forward_hook(get_activation('conv1'))
data, _ = ins_dataset_train[0]
data = data.to("cpu")
data.unsqueeze_(0)
output = model_cpu(data)
"""

act = activation['conv1'].squeeze()
fig, axarr = plt.subplots(2,2)
for idx in range(act.size(0)):

    axarr[0,0].imshow(act[0], cmap="gray")
    axarr[0,1].imshow(act[1], cmap="gray")
    axarr[1,0].imshow(act[2], cmap="gray")
    axarr[1,1].imshow(act[3], cmap="gray")\


In [0]:
with torch.no_grad():
    
    # Iterate over the test set
    for data in test_loader:
        images, labels = data

        images = images.to("cpu")  
        labels = labels.to("cpu") 
        model_cpu = model.to("cpu") 
        
        outputs = model(images)
activation = {}
def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook
model.conv2.register_forward_hook(get_activation('conv2'))
data, _ = ins_dataset_train[0]
data.unsqueeze_(0)
output = model(data)

act = activation['conv2'].squeeze()
fig, axarr = plt.subplots(2,2)
for idx in range(act.size(0)):
    axarr[0,0].imshow(act[0], cmap="gray")
    axarr[0,1].imshow(act[1], cmap="gray")
    axarr[1,0].imshow(act[2], cmap="gray")
    axarr[1,1].imshow(act[3], cmap="gray")

In [0]:
with torch.no_grad():
    
    # Iterate over the test set
    for data in test_loader:
        images, labels = data

        images = images.to("cpu")  
        labels = labels.to("cpu") 
        model_cpu = model.to("cpu") 
        
        outputs = model(images)
activation = {}
def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook
model.conv3.register_forward_hook(get_activation('conv3'))
data, _ = ins_dataset_train[0]
data.unsqueeze_(0)
output = model(data)

act = activation['conv3'].squeeze()
fig, axarr = plt.subplots(2,2)
for idx in range(act.size(0)):
    axarr[0,0].imshow(act[0], cmap="gray")
    axarr[0,1].imshow(act[1], cmap="gray")
    axarr[1,0].imshow(act[2], cmap="gray")
    axarr[1,1].imshow(act[3], cmap="gray")

In [0]:
with torch.no_grad():
    
    # Iterate over the test set
    for data in test_loader:
        images, labels = data

        images = images.to("cpu")  
        labels = labels.to("cpu") 
        model_cpu = model.to("cpu") 
        
        outputs = model(images)
activation = {}
def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook
model.conv4.register_forward_hook(get_activation('conv4'))
data, _ = ins_dataset_train[0]
data.unsqueeze_(0)
output = model(data)

act = activation['conv4'].squeeze()
fig, axarr = plt.subplots(2,2)
for idx in range(act.size(0)):
    axarr[0,0].imshow(act[0], cmap="gray")
    axarr[0,1].imshow(act[1], cmap="gray")
    axarr[1,0].imshow(act[2], cmap="gray")
    axarr[1,1].imshow(act[3], cmap="gray")

In [0]:
with torch.no_grad():
    
    # Iterate over the test set
    for data in test_loader:
        images, labels = data

        images = images.to("cpu")  
        labels = labels.to("cpu") 
        model_cpu = model.to("cpu") 
        
        outputs = model(images)
activation = {}
def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook
model.conv5.register_forward_hook(get_activation('conv5'))
data, _ = ins_dataset_train[0]
data.unsqueeze_(0)
output = model(data)

act = activation['conv5'].squeeze()
fig, axarr = plt.subplots(2,2)
for idx in range(act.size(0)):
    axarr[0,0].imshow(act[0], cmap="gray")
    axarr[0,1].imshow(act[1], cmap="gray")
    axarr[1,0].imshow(act[2], cmap="gray")
    axarr[1,1].imshow(act[3], cmap="gray")