<a href="https://colab.research.google.com/github/kristupas-g/deep_learning_course/blob/main/resnet50_inference.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Deep learning course first task

Student: **Kristupas Gaidys** *(2015973)*

Model: **resnet50**

Classes: **broccoli**, **hotdog**, **zucchini**

--- 

## Configuration

In [1]:
!pip install openimages torchmetrics

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting openimages
  Downloading openimages-0.0.1-py2.py3-none-any.whl (10 kB)
Collecting torchmetrics
  Downloading torchmetrics-0.11.1-py3-none-any.whl (517 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m517.2/517.2 KB[0m [31m20.3 MB/s[0m eta [36m0:00:00[0m
Collecting boto3
  Downloading boto3-1.26.79-py3-none-any.whl (132 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m132.7/132.7 KB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting cvdata
  Downloading cvdata-0.0.3-py3-none-any.whl (37 kB)
Collecting s3transfer<0.7.0,>=0.6.0
  Downloading s3transfer-0.6.0-py3-none-any.whl (79 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.6/79.6 KB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting botocore<1.30.0,>=1.29.79
  Downloading botocore-1.29.79-py3-none-any.whl (10.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━

In [2]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Downloading data

In [3]:
from os import path, makedirs
from math import ceil
from openimages.download import download_dataset

In [4]:
amount_to_classify = 1000
data_dir = "data"
images_per_class = ceil(amount_to_classify/3)
classes = ["Broccoli", "Pizza", "Banana"]

In [5]:
if not path.exists(data_dir):
    makedirs(data_dir)

In [6]:
download_dataset(data_dir, classes, limit=images_per_class)

100%|██████████| 161/161 [00:10<00:00, 15.25it/s]
100%|██████████| 334/334 [00:19<00:00, 17.01it/s]
100%|██████████| 334/334 [00:19<00:00, 17.09it/s]
100%|██████████| 10/10 [00:03<00:00,  2.89it/s]
100%|██████████| 39/39 [00:04<00:00,  9.54it/s]


{'broccoli': {'images_dir': 'data/broccoli/images'},
 'pizza': {'images_dir': 'data/pizza/images'},
 'banana': {'images_dir': 'data/banana/images'}}

## Custom Dataset class

In [7]:
from torchvision.io import read_image
from torch.utils.data.dataset import Dataset
from glob import glob

In [8]:
class ClassificationDataset(Dataset):
    def __init__(self,image_dir,transforms = None):
        self.transforms = transforms
        self.image_dir = image_dir


        self.files = glob(image_dir + "**/*.jpg", recursive=True) 
        
        
    def __getitem__(self, index):
        image_dir = self.files[index]
        image_tensor = read_image(image_dir).float()

        to_rgb = transforms.Lambda(lambda x: x.repeat(3, 1, 1) if x.size(0)==1 else x)
        image_tensor = to_rgb(image_tensor)
 
        if self.transforms is not None:
            image_tensor = self.transforms(image_tensor)

        class_name = image_dir.split("/")[1]
        label = torch.tensor([x.lower() for x in classes].index(class_name))

        image_tensor, label = image_tensor.to(device), label.to(device)

        return (image_tensor, label)    


    def __len__(self):
        return len(self.files)

## Model initialization


In [9]:
from torchvision.models import resnet50, ResNet50_Weights 

In [10]:
weights = ResNet50_Weights.DEFAULT

model = resnet50(weights = weights)
model.eval().to(device)

Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

## Dataloader


In [11]:
from torch.utils.data import DataLoader
import torchvision.transforms as transforms

In [12]:
transform = weights.transforms()

In [13]:
dataset = ClassificationDataset("data/", transforms = transform)
batchsize = 32
workers = 3

dataloader = DataLoader(
    dataset,
    batch_size = batchsize,
    #num_workers = workers 
)

## Performing inference

*class_idx* is a list of indexes of our chosen classes in the model

In [14]:
models_classes = weights.meta["categories"]

class_idx = [models_classes.index(chosen_class.lower()) for chosen_class in classes]

In [15]:
results_as_probabilities_with_target = []

for _, (data, target) in enumerate(dataloader):
    prediction = model(data).sigmoid()
    
    for image_idx, class_predictions in enumerate(prediction):
        chosen_class_predictions = [class_predictions[idx].item() for idx in class_idx]
        actual_class = target[image_idx].item()
        results_as_probabilities_with_target.append((chosen_class_predictions, actual_class))

*results_as_probabilities_with_target* variable holds a tuple where the first element is an array of our class probabilities and the second element of the tuple is the index of the actuall class.

## Result interpretation

### Baseline thresholds

In [16]:
# List of class probabilities without the target
results_as_probabilities = [x[0] for x in results_as_probabilities_with_target]    

Calculating a baseline threshold for our classes. Threshold will just be a mean of the predictions

In [17]:
import numpy as np

In [18]:
thresholds = np.zeros(len(classes))

for class_probabilities in results_as_probabilities: 
    for idx, value in enumerate(class_probabilities):
        thresholds[idx] += value

thresholds = [prob_sum / images_per_class for prob_sum in thresholds] 

print(thresholds)
thresholds = [0.9,0.9,0.9]

[0.8897708074259769, 8.947452093471244e-12, 0.09935748144101238]


### Comparing results to threshold values

*results_as_booleans* is a list which contains lists of boolean values that describe if the class was detected in the image

In [19]:
results_as_booleans = []

for class_probabilities in results_as_probabilities:
    image_booleans = []
    for class_idx, class_probability in enumerate(class_probabilities):
        verdict = class_probability >= thresholds[class_idx]
        image_booleans.append(int(verdict))
    results_as_booleans.append(image_booleans)

*results_as_booleans_with_target* is a list of tuples where the first element is an array of booleans and the second element is the index of the class that we are expecting

In [20]:
results_as_booleans_with_target = []

for idx in range(len(results_as_probabilities_with_target)):
    booleans_with_target = (results_as_booleans[idx], results_as_probabilities_with_target[idx][1])
    results_as_booleans_with_target.append(booleans_with_target) 

### Calculating TP, FP, TN, FN

In [21]:
true_positives = 0
false_positives = 0
true_negatives = 0
false_negatives = 0

In [22]:
for predictions, target_idx in results_as_booleans_with_target:
    for idx, prediction in enumerate(predictions):
        # Positives
        if prediction == 1 and idx == target_idx:
            true_positives += 1
        if prediction == 1 and idx != target_idx:
            false_positives += 1
            
        # Negatives
        if prediction == 0 and idx != target_idx:
            true_negatives += 1
        if prediction == 0 and idx == target_idx:
            false_negatives += 1

### Calculating *accuracy*

**Accuracy = (TP + TN) / (TP + TN + FP + FN)**

In [23]:
accuracy = (true_positives + true_negatives) / \
    (true_positives + true_negatives + false_positives + false_negatives)

### Calculating *precision*

**Precision = TP / (TP + FP)**

In [24]:
precision = true_positives / (true_positives + false_positives)

### Calculating *recall*

**Recall = TP / (TP + FN)**

In [25]:
recall = true_positives / (true_positives + false_negatives)

### Calculating *F1 score*

**F1 score = 2 * (precision * recall) / (precision + recall)**

In [26]:
f1 = 2 * (precision * recall) / (precision + recall)

## Generating report

This library was used to verify that calculations statistic calculations are correct

In [27]:
import torchmetrics

In [29]:
predictions, targets = zip(*results_as_probabilities_with_target)
trg = [np.zeros(len(classes)) for x in targets]
for idx, arr in enumerate(trg):
    arr[targets[idx]] = 1

predictions_tensor = torch.tensor(predictions)
targets_tensor = torch.tensor(trg)

threshold = 0.9

accuracy_metric = torchmetrics.classification.MultilabelAccuracy(num_labels = 3, threshold = threshold, average = "micro")
accuracy = accuracy_metric(predictions_tensor, targets_tensor).item()

precision_metric = torchmetrics.classification.MultilabelPrecision(num_labels = 3, threshold = threshold, average = "micro")
precision = precision_metric(predictions_tensor, targets_tensor).item()

recall_metric = torchmetrics.classification.MultilabelRecall(num_labels = 3, threshold = threshold, average = "micro")
recall = recall_metric(predictions_tensor, targets_tensor).item()

f1_metric = torchmetrics.classification.MultilabelF1Score(num_labels = 3, threshold = threshold, average = "micro")
f1 = f1_metric(predictions_tensor, targets_tensor).item()

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 score:", f1)

Accuracy: 0.6100987195968628
Precision: 0.25573769211769104
Recall: 0.08883827179670334
F1 score: 0.1318681389093399


In [30]:
print("Chosen classes: \t\t\t", classes)
print("\n")

print("Amount of predictions done: \t\t", amount_to_classify)
print("Images per class: \t\t\t", images_per_class)
print("\n")

print("TP: \t\t\t\t\t", true_positives)
print("FP: \t\t\t\t\t", false_positives)
print("TN: \t\t\t\t\t", true_negatives)
print("FN: \t\t\t\t\t", false_negatives)
print("\n")

print("Accuracy: \t\t\t\t", accuracy)
print("Precision: \t\t\t\t", precision)
print("Recall: \t\t\t\t", recall)
print("F1 score: \t\t\t\t", f1)

Chosen classes: 			 ['Broccoli', 'Pizza', 'Banana']


Amount of predictions done: 		 1000
Images per class: 			 334


TP: 					 78
FP: 					 227
TN: 					 1529
FN: 					 800


Accuracy: 				 0.6100987195968628
Precision: 				 0.25573769211769104
Recall: 				 0.08883827179670334
F1 score: 				 0.1318681389093399
