<a href="https://colab.research.google.com/github/kristupas-g/deep_learning_course/blob/main/resnet50_inference.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Deep learning course first task

Student: **Kristupas Gaidys** *(2015973)*

Model: **resnet50**



--- 

In [42]:
classes = ["Cucumber", "Artichoke", "Banana"]

## Configuration

In [43]:
!pip install openimages torchmetrics

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [44]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Downloading data

In [45]:
from os import path, makedirs
from math import ceil
from openimages.download import download_dataset

In [46]:
amount_to_classify = 1000
data_dir = "data"
images_per_class = ceil(amount_to_classify/3)

In [47]:
if not path.exists(data_dir):
    makedirs(data_dir)

In [48]:
download_dataset(data_dir, classes, limit=images_per_class)

100%|██████████| 334/334 [00:10<00:00, 32.89it/s]
100%|██████████| 71/71 [00:02<00:00, 25.81it/s]
100%|██████████| 334/334 [00:09<00:00, 35.07it/s]


{'cucumber': {'images_dir': 'data/cucumber/images'},
 'artichoke': {'images_dir': 'data/artichoke/images'},
 'banana': {'images_dir': 'data/banana/images'}}

## Custom Dataset class

In [49]:
from torch.utils.data.dataset import Dataset
from glob import glob
from PIL import Image

In [50]:
class ClassificationDataset(Dataset):
    def __init__(self,image_dir,transforms = None):
        self.transforms = transforms
        self.image_dir = image_dir

        self.files = glob(image_dir + "**/*.jpg", recursive=True) 
        
        
    def __getitem__(self, index):
        image_dir = self.files[index]
        image_tensor = Image.open(image_dir).convert('RGB')
        
        image_tensor_transformed = image_tensor
        if self.transforms is not None:
           image_tensor_transformed = self.transforms(image_tensor)

        class_name = image_dir.split("/")[1]
        label = torch.tensor([x.lower() for x in classes].index(class_name))

        image_tensor_transformed, label = image_tensor_transformed.to(device), label.to(device)
        
        return (image_tensor_transformed, label)    


    def __len__(self):
        return len(self.files)

## Model initialization


In [51]:
from torchvision.models import resnet50, ResNet50_Weights 

In [52]:
weights = ResNet50_Weights.DEFAULT

model = resnet50(weights = weights)
model.eval().to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

## Dataloader


In [53]:
from torch.utils.data import DataLoader
import torchvision.transforms as transforms

In [54]:
transform = transforms.Compose([transforms.PILToTensor(), weights.transforms()])

In [41]:
dataset = ClassificationDataset(data_dir, transforms = transform)
batchsize = 64

dataloader = DataLoader(
    dataset,
    batch_size = batchsize,
)

## Performing inference

*class_idx* is a list of indexes of our chosen classes in the model

In [55]:
models_classes = weights.meta["categories"]

class_idx = [models_classes.index(chosen_class.lower()) for chosen_class in classes]

In [56]:
results_as_probabilities_with_target = []

for _, (data, target) in enumerate(dataloader):
    prediction = model(data).sigmoid()
    print(prediction.shape)
    for image_idx, class_predictions in enumerate(prediction):
        chosen_class_predictions = [class_predictions[idx].item() for idx in class_idx]
        actual_class = target[image_idx].item()
        results_as_probabilities_with_target.append((chosen_class_predictions, actual_class))

*results_as_probabilities_with_target* variable holds a tuple where the first element is an array of our class probabilities and the second element of the tuple is the index of the actuall class.

## Thresholds

In [57]:
thresholds = [0.8, 0.8, 0.8]

## Result interpretation

### Comparing results to threshold values

In [58]:
results_as_probabilities = [x[0] for x in results_as_probabilities_with_target]    

*results_as_booleans* is a list which contains lists of boolean values that describe if the class was detected in the image

In [59]:
results_as_booleans = []

for class_probabilities in results_as_probabilities:
    image_booleans = []
    for class_idx, class_probability in enumerate(class_probabilities):
        verdict = class_probability > thresholds[class_idx]
        image_booleans.append(int(verdict))
    results_as_booleans.append(image_booleans)

print(results_as_booleans)

[]


*results_as_booleans_with_target* is a list of tuples where the first element is an array of booleans and the second element is the index of the class that we are expecting

In [60]:
results_as_booleans_with_target = []

for idx in range(len(results_as_probabilities_with_target)):
    booleans_with_target = (results_as_booleans[idx], results_as_probabilities_with_target[idx][1])
    results_as_booleans_with_target.append(booleans_with_target) 

### Calculating TP, FP, TN, FN

In [61]:
true_positives = 0
false_positives = 0
true_negatives = 0
false_negatives = 0

In [62]:
for predictions, target_idx in results_as_booleans_with_target:
    for idx, prediction in enumerate(predictions):
        # Positives
        if prediction == 1 and idx == target_idx:
            true_positives += 1
        if prediction == 1 and idx != target_idx:
            false_positives += 1
            
        # Negatives
        if prediction == 0 and idx != target_idx:
            true_negatives += 1
        if prediction == 0 and idx == target_idx:
            false_negatives += 1
print(true_positives)
print(false_positives)
print(false_positives)
print(true_negatives)

0
0
0
0


### Calculating *accuracy*

**Accuracy = (TP + TN) / (TP + TN + FP + FN)**

In [63]:
accuracy = (true_positives + true_negatives) / \
    (true_positives + true_negatives + false_positives + false_negatives)

ZeroDivisionError: ignored

### Calculating *precision*

**Precision = TP / (TP + FP)**

In [None]:
precision = true_positives / (true_positives + false_positives)

### Calculating *recall*

**Recall = TP / (TP + FN)**

In [None]:
recall = true_positives / (true_positives + false_negatives)

### Calculating *F1 score*

**F1 score = 2 * (precision * recall) / (precision + recall)**

In [None]:
f1 = 2 * (precision * recall) / (precision + recall)

## Generating report

This library was used to verify that calculations statistic calculations are correct

In [None]:
print("Chosen classes: \t\t\t", classes)
print("\n")

print("Amount of predictions done: \t\t", amount_to_classify)
print("Images per class: \t\t\t", images_per_class)
print("\n")

print("TP: \t\t\t\t\t", true_positives)
print("FP: \t\t\t\t\t", false_positives)
print("TN: \t\t\t\t\t", true_negatives)
print("FN: \t\t\t\t\t", false_negatives)
print("\n")

print("Accuracy: \t\t\t\t", accuracy)
print("Precision: \t\t\t\t", precision)
print("Recall: \t\t\t\t", recall)
print("F1 score: \t\t\t\t", f1)