<a href="https://colab.research.google.com/github/kristupas-g/deep_learning_course/blob/main/resnet50_inference.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Deep learning course first task

Student: **Kristupas Gaidys** *(2015973)*

Model: **resnet50**



--- 

In [234]:
classes = ["Cucumber", "Artichoke", "Banana"]

## Configuration

In [235]:
!pip install openimages torchmetrics

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [236]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Downloading data

In [237]:
from os import path, makedirs
from math import ceil
from openimages.download import download_dataset

In [238]:
amount_to_classify = 1000
data_dir = "data"
images_per_class = ceil(amount_to_classify/3)

In [239]:
if not path.exists(data_dir):
    makedirs(data_dir)

In [240]:
download_dataset(data_dir, classes, limit=images_per_class)

100%|██████████| 334/334 [00:19<00:00, 17.12it/s]
100%|██████████| 71/71 [00:05<00:00, 12.60it/s]
100%|██████████| 334/334 [00:19<00:00, 17.02it/s]


{'cucumber': {'images_dir': 'data/cucumber/images'},
 'artichoke': {'images_dir': 'data/artichoke/images'},
 'banana': {'images_dir': 'data/banana/images'}}

## Custom Dataset class

In [241]:
from torchvision.io import read_image
from torch.utils.data.dataset import Dataset
from glob import glob

In [242]:
class ClassificationDataset(Dataset):
    def __init__(self,image_dir,transforms = None):
        self.transforms = transforms
        self.image_dir = image_dir


        self.files = glob(image_dir + "**/*.jpg", recursive=True) 
        
        
    def __getitem__(self, index):
        image_dir = self.files[index]
        image_tensor = read_image(image_dir).float()

        to_rgb = transforms.Lambda(lambda x: x.repeat(3, 1, 1) if x.size(0)==1 else x)
        image_tensor = to_rgb(image_tensor)
 
        if self.transforms is not None:
            image_tensor = self.transforms(image_tensor)

        class_name = image_dir.split("/")[1]
        label = torch.tensor([x.lower() for x in classes].index(class_name))
   
        image_tensor, label = image_tensor.to(device), label.to(device)
        
        return (image_tensor, label)    


    def __len__(self):
        return len(self.files)

## Model initialization


In [243]:
from torchvision.models import resnet50, ResNet50_Weights 

In [244]:
weights = ResNet50_Weights.DEFAULT

model = resnet50(weights = weights)
model.eval().to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

## Dataloader


In [245]:
from torch.utils.data import DataLoader
import torchvision.transforms as transforms

In [246]:
transform = weights.transforms()

In [247]:
dataset = ClassificationDataset("data/", transforms = transform)
batchsize = 32
workers = 3

dataloader = DataLoader(
    dataset,
    batch_size = batchsize,
    #num_workers = workers 
)

## Performing inference

*class_idx* is a list of indexes of our chosen classes in the model

In [248]:
models_classes = weights.meta["categories"]

class_idx = [models_classes.index(chosen_class.lower()) for chosen_class in classes]

In [249]:
results_as_probabilities_with_target = []

for data, target in dataloader:
    prediction = model(data).sigmoid()

    for image_idx, class_predictions in enumerate(prediction):
        chosen_class_predictions = [class_predictions[idx].item() for idx in class_idx]
        actual_class = target[image_idx].item()
        results_as_probabilities_with_target.append((chosen_class_predictions, actual_class))

*results_as_probabilities_with_target* variable holds a tuple where the first element is an array of our class probabilities and the second element of the tuple is the index of the actuall class.

## Thresholds

In [282]:
thresholds = [0.5, 0.5, 0.5]

## Result interpretation

### Comparing results to threshold values

*results_as_booleans* is a list which contains lists of boolean values that describe if the class was detected in the image

In [299]:
results_as_booleans = []

for class_probabilities in results_as_probabilities:
    image_booleans = []
    for class_idx, class_probability in enumerate(class_probabilities):
        verdict = class_probability >= thresholds[class_idx]
        image_booleans.append(int(verdict))
    results_as_booleans.append(image_booleans)

*results_as_booleans_with_target* is a list of tuples where the first element is an array of booleans and the second element is the index of the class that we are expecting

In [300]:
results_as_booleans_with_target = []

for idx in range(len(results_as_probabilities_with_target)):
    booleans_with_target = (results_as_booleans[idx], results_as_probabilities_with_target[idx][1])
    results_as_booleans_with_target.append(booleans_with_target) 

### Calculating TP, FP, TN, FN

In [301]:
true_positives = 0
false_positives = 0
true_negatives = 0
false_negatives = 0

In [302]:
for predictions, target_idx in results_as_booleans_with_target:
    for idx, prediction in enumerate(predictions):
        # Positives
        if prediction == 1 and idx == target_idx:
            true_positives += 1
        if prediction == 1 and idx != target_idx:
            false_positives += 1
            
        # Negatives
        if prediction == 0 and idx != target_idx:
            true_negatives += 1
        if prediction == 0 and idx == target_idx:
            false_negatives += 1

### Calculating *accuracy*

**Accuracy = (TP + TN) / (TP + TN + FP + FN)**

In [303]:
accuracy = (true_positives + true_negatives) / \
    (true_positives + true_negatives + false_positives + false_negatives)

### Calculating *precision*

**Precision = TP / (TP + FP)**

In [304]:
precision = true_positives / (true_positives + false_positives)

### Calculating *recall*

**Recall = TP / (TP + FN)**

In [305]:
recall = true_positives / (true_positives + false_negatives)

### Calculating *F1 score*

**F1 score = 2 * (precision * recall) / (precision + recall)**

In [306]:
f1 = 2 * (precision * recall) / (precision + recall)

## Generating report

This library was used to verify that calculations statistic calculations are correct

In [307]:
print("Chosen classes: \t\t\t", classes)
print("\n")

print("Amount of predictions done: \t\t", amount_to_classify)
print("Images per class: \t\t\t", images_per_class)
print("\n")

print("TP: \t\t\t\t\t", true_positives)
print("FP: \t\t\t\t\t", false_positives)
print("TN: \t\t\t\t\t", true_negatives)
print("FN: \t\t\t\t\t", false_negatives)
print("\n")

print("Accuracy: \t\t\t\t", accuracy)
print("Precision: \t\t\t\t", precision)
print("Recall: \t\t\t\t", recall)
print("F1 score: \t\t\t\t", f1)

Chosen classes: 			 ['Cucumber', 'Artichoke', 'Banana']


Amount of predictions done: 		 1000
Images per class: 			 334


TP: 					 30
FP: 					 83
TN: 					 1395
FN: 					 709


Accuracy: 				 0.6427604871447903
Precision: 				 0.26548672566371684
Recall: 				 0.04059539918809202
F1 score: 				 0.07042253521126761


### Used a library to verify statistic calculations

In [308]:
import torchmetrics

In [309]:
threshold = 0.1
while threshold < 1:
    predictions, targets = zip(*results_as_probabilities_with_target)
    trg = [np.zeros(len(classes)) for x in targets]
    for idx, arr in enumerate(trg):
        arr[targets[idx]] = 1

    predictions_tensor = torch.tensor(predictions)
    targets_tensor = torch.tensor(trg)


    accuracy_metric = torchmetrics.classification.MultilabelAccuracy(num_labels = 3, threshold = threshold, average = "micro")
    accuracy = accuracy_metric(predictions_tensor, targets_tensor).item()

    precision_metric = torchmetrics.classification.MultilabelPrecision(num_labels = 3, threshold = threshold, average = "micro")
    precision = precision_metric(predictions_tensor, targets_tensor).item()

    recall_metric = torchmetrics.classification.MultilabelRecall(num_labels = 3, threshold = threshold, average = "micro")
    recall = recall_metric(predictions_tensor, targets_tensor).item()

    f1_metric = torchmetrics.classification.MultilabelF1Score(num_labels = 3, threshold = threshold, average = "micro")
    f1 = f1_metric(predictions_tensor, targets_tensor).item()

    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 score:", f1)
    print("\n")
    threshold += 0.1

Accuracy: 0.6391519904136658
Precision: 0.27407407760620117
Recall: 0.050067659467458725
F1 score: 0.08466818928718567


Accuracy: 0.6423094272613525
Precision: 0.2857142984867096
Recall: 0.04871447756886482
F1 score: 0.08323699235916138


Accuracy: 0.6427605152130127
Precision: 0.28099173307418823
Recall: 0.046008117496967316
F1 score: 0.07906977087259293


Accuracy: 0.6427605152130127
Precision: 0.2735042870044708
Recall: 0.04330175742506981
F1 score: 0.07476635277271271


Accuracy: 0.6427605152130127
Precision: 0.2654867172241211
Recall: 0.0405953973531723
F1 score: 0.07042253762483597


Accuracy: 0.6432115435600281
Precision: 0.25925925374031067
Recall: 0.037889041006565094
F1 score: 0.06611569970846176


Accuracy: 0.6450157761573792
Precision: 0.26923078298568726
Recall: 0.037889041006565094
F1 score: 0.06642942130565643


Accuracy: 0.6459178924560547
Precision: 0.27450981736183167
Recall: 0.037889041006565094
F1 score: 0.06658739596605301


Accuracy: 0.6486242413520813
Precision: