by Simona Kuoraitė 2110586 <br>
Model: resnet101 <br>
DataSet: 1000 images, 3 classes: Dog, Cat, Bird

In [1]:
%%capture
!pip install torch torchvision
!pip install opencv-contrib-python
!pip install openimages
!pip install matplotlib

In [2]:
import os
from collections import defaultdict

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sn

from PIL import Image
from openimages.download import download_dataset
from sklearn.metrics import confusion_matrix

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset

from torchvision import models
from torchvision import transforms
from torchvision.models import resnet101, ResNet101_Weights

Downloading the DataSet

In [3]:
data_directory = "data"
total_samples = 1000
classes = ["Dog", "Cat", "Bird"]

if not os.path.exists(data_directory):
    os.makedirs(data_directory)

limit_per_class = total_samples // len(classes)

In [4]:
download_dataset(data_directory, classes, limit=limit_per_class)

KeyboardInterrupt: 

Getting the DataSet ready <br>
resnet101 expects input images to be normalized with mean [0.485, 0.456, 0.406] and standard deviation [0.229, 0.224, 0.225]

In [None]:
custom_transform = transforms.Compose([
 transforms.Resize(256),
 transforms.CenterCrop(224),
 transforms.ToTensor(),
 transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 ])

In [None]:
class CustomDataset(Dataset):
  def __init__(self, data_folder, transform=None):
    self.data_folder = data_folder
    self.classes = os.listdir(data_folder)
    self.transform = transform

  def __len__(self):
    total_samples = 0
    for class_ in self.classes:
      class_folder = os.path.join(self.data_folder, class_)
      for folder in os.listdir(os.path.join(self.data_folder, class_)):
        total_samples += len(os.listdir(os.path.join(class_folder, folder)))
    return total_samples

  def __getitem__(self, index):
    class_index = index % len(self.classes)
    class_folder = os.path.join(self.data_folder, self.classes[class_index], 'images')
    image_index = index // len(self.classes)
    image_name = os.listdir(class_folder)[image_index]
    image_path = os.path.join(class_folder, image_name)

    image = Image.open(image_path).convert("RGB")

    if self.transform is not None:
            image = self.transform(image)

    label = self.classes[class_index]

    return (image, label)

In [None]:
class CustomDataLoader:
    def __init__(self, dataset, batch_size):
        self.dataset = dataset
        self.batch_size = batch_size
        self.num_samples = len(dataset)
        self.num_batches = (self.num_samples + batch_size - 1) // batch_size
        self.current_batch = 0

    def __iter__(self):
        return self

    def __next__(self):
        if self.current_batch < self.num_batches:
            start_index = self.current_batch * self.batch_size
            end_index = min((self.current_batch + 1) * self.batch_size, self.num_samples)
            batch_data = [self.dataset[i] for i in range(start_index, end_index)]
            self.current_batch += 1
            return batch_data
        else:
            raise StopIteration

Incorporating all images from the dataset into the model and retrieving softmax predictions for each image

In [None]:
class_names = os.listdir('/content/data')
num_classes = len(class_names)

model = resnet101(weights=ResNet101_Weights.DEFAULT, progress=False)
model.fc = nn.Linear(model.fc.in_features, num_classes)

batch = 32
number_of_batches = (999 + batch - 1) // batch

custom_dataset = CustomDataset(data_folder='/content/data', transform=custom_transform)
custom_dataloader = CustomDataLoader(dataset=custom_dataset, batch_size=batch)

current_iteration = 0
probability_arr = []
classes_arr = []

model.eval()

for batch_data in custom_dataloader:
  print(f"Current batch iteration: {current_iteration} Batches left: {(total_samples + batch - 1) // batch - current_iteration}")
  (images, labels) = zip(*batch_data)
  for image, label in zip(images, labels):
    with torch.no_grad():
      output = model(image.unsqueeze(0))

    softmax = nn.Softmax(dim=1)
    probabilities = softmax(output)

    probability_arr.append(probabilities)
    classes_arr.append(label)

  current_iteration += 1

print("Done")

In [None]:
threshold = 0.3

predicted_class_arr = []
actual_class_arr = []
element = 0

for probability_tensor in probability_arr:
  indices_tensor = torch.nonzero(probability_tensor > threshold, as_tuple=False)
  indices_list = indices_tensor[:, 1].tolist()

  predictions = [class_names[index] for index in indices_list]

  if len(predictions) > 0:
        for prediction in predictions:
            predicted_class_arr.append(prediction)
            actual_class_arr.append(classes_arr[element])

  element += 1

Calculate Statistics:
* accuracy = (TN + TP)/(TP+TN+FP+FN)
* precision = TP/TP+FP. Precision - the percentage of positive guesses that were true.
* recall = TP/TP+FN. Recall - the percentage of positive test examples classified as positive.
* F1 = (2 * precision * recall)/(precision + recall). F1 - a middle ground between precision and recall.





In [None]:
cf_matrix = confusion_matrix(predicted_class_arr, actual_class_arr, labels=class_names)
if cf_matrix.shape == (1, 1):
    cf_matrix = np.zeros((len(class_names), len(class_names)))

# Create a DataFrame with the confusion matrix and class indices
df_cm = pd.DataFrame(cf_matrix, index=class_names, columns=class_names)
plt.figure(figsize = (12,7))
sn.heatmap(df_cm, annot=True, fmt='g')

In [None]:
class_names = os.listdir('/content/data')

accuracy_sum, precision_sum, recall_sum, f1_sum = 0, 0, 0, 0

for i in range(3):
  print(f"Class: {class_names[i]}")
  TP = df_cm.iloc[i,i]
  FP = df_cm.iloc[i,:].sum() - TP
  FN = df_cm.iloc[:,i].sum()
  TN = df_cm.sum().sum() - TP - FP - FN

  accuracy = (TP + TN) / (TP + TN + FP + FN)
  precision = TP / (TP + FP)
  recall = TP / (TP + FN)
  f1 = (2 * precision * recall)/(precision + recall)

  accuracy_sum += accuracy
  precision_sum += precision
  recall_sum += recall
  f1_sum += f1

  print(f"accuracy: {accuracy:.2f}")
  print(f"precision: {precision:.2f}")
  print(f"recall: {recall:.2f}")
  print(f"F1: {f1:.2f}")
  print("")

print(f"accuracy mean: {(accuracy_sum / 3):.2f}")
print(f"precision mean: {(precision_sum / 3):.2f}")
print(f"recall mean: {(recall_sum / 3):.2f}")
print(f"F1 mean: {(f1_sum / 3):.2f}")

Classifying a random image

In [None]:
image = Image.open('/content/testImages/cat.png').convert("RGB")
image = custom_transform(image)

output = model(image.unsqueeze(0))
softmax = nn.Softmax(dim=1)
probabilities = softmax(output)

predicted_class_index = torch.argmax(probabilities, dim=1).item()
predicted_class = class_names[predicted_class_index]

print(predicted_class)

plt.imshow(image.permute(1, 2, 0))
plt.show()