<a href="https://colab.research.google.com/github/kristupas-g/deep_learning_course/blob/main/resnet50_inference.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Deep learning course first task

Student: **Kristupas Gaidys** *(2015973)*

Model: **resnet50**



--- 

In [1]:
classes = ["Hamster", "Snail", "Lemon"]

## Configuration

In [2]:
!pip install openimages torchmetrics

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting openimages
  Downloading openimages-0.0.1-py2.py3-none-any.whl (10 kB)
Collecting torchmetrics
  Downloading torchmetrics-0.11.1-py3-none-any.whl (517 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m517.2/517.2 KB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting boto3
  Downloading boto3-1.26.79-py3-none-any.whl (132 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m132.7/132.7 KB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
Collecting cvdata
  Downloading cvdata-0.0.3-py3-none-any.whl (37 kB)
Collecting botocore<1.30.0,>=1.29.79
  Downloading botocore-1.29.79-py3-none-any.whl (10.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.4/10.4 MB[0m [31m60.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting jmespath<2.0.0,>=0.7.1
  Downloading jmespath-1.0.1-py3-none-any.whl (20 kB)
Collecting s3transfer<0.7.0,>=0.6.0


In [3]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Downloading data

In [4]:
from os import path, makedirs
from math import ceil
from openimages.download import download_dataset

In [5]:
amount_to_classify = 30
data_dir = "data"
images_per_class = ceil(amount_to_classify/3)

In [6]:
if not path.exists(data_dir):
    makedirs(data_dir)

In [7]:
download_dataset(data_dir, classes, limit=images_per_class)

100%|██████████| 10/10 [00:00<00:00, 21.87it/s]
100%|██████████| 10/10 [00:00<00:00, 18.49it/s]
100%|██████████| 10/10 [00:00<00:00, 20.60it/s]


{'hamster': {'images_dir': 'data/hamster/images'},
 'snail': {'images_dir': 'data/snail/images'},
 'lemon': {'images_dir': 'data/lemon/images'}}

## Custom Dataset class

In [8]:
from torchvision.io import read_image, ImageReadMode
from torch.utils.data.dataset import Dataset
from glob import glob
from PIL import Image

In [9]:
class ClassificationDataset(Dataset):
    def __init__(self,image_dir,transforms = None):
        self.transforms = transforms
        self.image_dir = image_dir

        self.files = glob(image_dir + "**/*.jpg", recursive=True) 
        
        
    def __getitem__(self, index):
        image_dir = self.files[index]
        image_tensor = Image.open(image_dir).convert('RGB')
        
        image_tensor_transformed = image_tensor
        if self.transforms is not None:
           image_tensor_transformed = self.transforms(image_tensor)

        class_name = image_dir.split("/")[1]
        label = torch.tensor([x.lower() for x in classes].index(class_name))

        image_tensor_transformed, label = image_tensor_transformed.to(device), label.to(device)
        
        return (image_tensor_transformed, label)    


    def __len__(self):
        return len(self.files)

## Model initialization


In [10]:
from torchvision.models import resnet50, ResNet50_Weights 

In [11]:
weights = ResNet50_Weights.DEFAULT

model = resnet50(weights = weights)
model.eval().to(device)

Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

## Dataloader


In [12]:
from torch.utils.data import DataLoader
import torchvision.transforms as transforms

In [13]:
transform = transforms.Compose([transforms.PILToTensor(), weights.transforms()])

In [14]:
dataset = ClassificationDataset("data/", transforms = transform)
batchsize = 32

dataloader = DataLoader(
    dataset,
    batch_size = batchsize,
)

## Performing inference

*class_idx* is a list of indexes of our chosen classes in the model

In [15]:
models_classes = weights.meta["categories"]

class_idx = [models_classes.index(chosen_class.lower()) for chosen_class in classes]

In [16]:
results_as_probabilities_with_target = []
from pprint import pprint

for data, target in dataloader:
    prediction = model(data).sigmoid()

    for image_idx, class_predictions in enumerate(prediction):
        chosen_class_predictions = [class_predictions[idx].item() for idx in class_idx]
        actual_class = target[image_idx].item()
        results_as_probabilities_with_target.append((chosen_class_predictions, actual_class))

print(results_as_probabilities_with_target)

[([0.5352910161018372, 0.8314783573150635, 0.9977748990058899], 2), ([0.3386389911174774, 0.7846998572349548, 0.9274857640266418], 2), ([0.4668889045715332, 0.4402637779712677, 0.9991722106933594], 2), ([0.5568751096725464, 0.524972677230835, 0.8956348896026611], 2), ([0.42064914107322693, 0.4729417860507965, 0.9992445707321167], 2), ([0.5102195739746094, 0.3772655427455902, 0.9985504746437073], 2), ([0.4376053810119629, 0.4776589274406433, 0.9994598031044006], 2), ([0.4627101421356201, 0.4419185221195221, 0.9911077618598938], 2), ([0.5722915530204773, 0.6560630202293396, 0.9984084963798523], 2), ([0.6626268625259399, 0.5209422707557678, 0.9973365664482117], 2), ([0.9984087347984314, 0.48043376207351685, 0.5092498064041138], 0), ([0.9555947184562683, 0.6173796057701111, 0.41695526242256165], 0), ([0.9984346032142639, 0.45736563205718994, 0.8669163584709167], 0), ([0.9992289543151855, 0.5547716617584229, 0.5218122601509094], 0), ([0.9928988218307495, 0.6409716010093689, 0.38796532154083

*results_as_probabilities_with_target* variable holds a tuple where the first element is an array of our class probabilities and the second element of the tuple is the index of the actuall class.

## Thresholds

In [33]:
thresholds = [0.9, 0.9, 0.9]

## Result interpretation

### Comparing results to threshold values

In [34]:
results_as_probabilities = [x[0] for x in results_as_probabilities_with_target]    

*results_as_booleans* is a list which contains lists of boolean values that describe if the class was detected in the image

In [35]:
results_as_booleans = []

for class_probabilities in results_as_probabilities:
    image_booleans = []
    for class_idx, class_probability in enumerate(class_probabilities):
        verdict = class_probability >= thresholds[class_idx]
        image_booleans.append(int(verdict))
    results_as_booleans.append(image_booleans)

*results_as_booleans_with_target* is a list of tuples where the first element is an array of booleans and the second element is the index of the class that we are expecting

In [36]:
results_as_booleans_with_target = []

for idx in range(len(results_as_probabilities_with_target)):
    booleans_with_target = (results_as_booleans[idx], results_as_probabilities_with_target[idx][1])
    results_as_booleans_with_target.append(booleans_with_target) 

### Calculating TP, FP, TN, FN

In [37]:
true_positives = 0
false_positives = 0
true_negatives = 0
false_negatives = 0

In [38]:
for predictions, target_idx in results_as_booleans_with_target:
    for idx, prediction in enumerate(predictions):
        # Positives
        if prediction == 1 and idx == target_idx:
            true_positives += 1
        if prediction == 1 and idx != target_idx:
            false_positives += 1
            
        # Negatives
        if prediction == 0 and idx != target_idx:
            true_negatives += 1
        if prediction == 0 and idx == target_idx:
            false_negatives += 1

### Calculating *accuracy*

**Accuracy = (TP + TN) / (TP + TN + FP + FN)**

In [39]:
accuracy = (true_positives + true_negatives) / \
    (true_positives + true_negatives + false_positives + false_negatives)

### Calculating *precision*

**Precision = TP / (TP + FP)**

In [40]:
precision = true_positives / (true_positives + false_positives)

### Calculating *recall*

**Recall = TP / (TP + FN)**

In [41]:
recall = true_positives / (true_positives + false_negatives)

### Calculating *F1 score*

**F1 score = 2 * (precision * recall) / (precision + recall)**

In [42]:
f1 = 2 * (precision * recall) / (precision + recall)

## Generating report

This library was used to verify that calculations statistic calculations are correct

In [43]:
print("Chosen classes: \t\t\t", classes)
print("\n")

print("Amount of predictions done: \t\t", amount_to_classify)
print("Images per class: \t\t\t", images_per_class)
print("\n")

print("TP: \t\t\t\t\t", true_positives)
print("FP: \t\t\t\t\t", false_positives)
print("TN: \t\t\t\t\t", true_negatives)
print("FN: \t\t\t\t\t", false_negatives)
print("\n")

print("Accuracy: \t\t\t\t", accuracy)
print("Precision: \t\t\t\t", precision)
print("Recall: \t\t\t\t", recall)
print("F1 score: \t\t\t\t", f1)

Chosen classes: 			 ['Hamster', 'Snail', 'Lemon']


Amount of predictions done: 		 30
Images per class: 			 10


TP: 					 28
FP: 					 0
TN: 					 60
FN: 					 2


Accuracy: 				 0.9777777777777777
Precision: 				 1.0
Recall: 				 0.9333333333333333
F1 score: 				 0.9655172413793104
