# Paulius Minajevas, 2110599, Inception V3 ["Candle", "Jellyfish", "Pizza"]

In [None]:
pip install openimages

In [None]:
import torch
from torchvision.models import Inception_V3_Weights

For checking possible categories of the model

In [None]:
Inception_V3_Weights.DEFAULT.meta["categories"]

In [None]:
import os
from openimages.download import download_dataset

Main execution parameters

In [None]:
data_dir = "./data"
number_for_samples = 500
classes = ["Candle", "Jellyfish", "Pizza"]

Downloading dataset

In [None]:
if not os.path.exists(data_dir):
  os.makedirs(data_dir)

download_dataset(data_dir, classes, limit=number_for_samples)

100%|██████████| 500/500 [00:09<00:00, 52.66it/s]
100%|██████████| 379/379 [00:06<00:00, 55.01it/s]
100%|██████████| 409/409 [00:07<00:00, 57.18it/s]


{'candle': {'images_dir': './data/candle/images'},
 'jellyfish': {'images_dir': './data/jellyfish/images'},
 'pizza': {'images_dir': './data/pizza/images'}}

If available set runtime on GPU

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


In [None]:
from torchvision.models import inception_v3
from torchvision.transforms import transforms

Pretrained model retrieval

In [None]:
model = inception_v3(pretrained=True).to(device)
model.eval()

Image tranformation settings required for the model ([reference](https://pytorch.org/hub/pytorch_vision_inception_v3/))

In [None]:
preprocess = transforms.Compose([
    transforms.Resize(299),
    transforms.CenterCrop(299),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
from torchvision import datasets
from torch.utils.data import DataLoader
from PIL import Image
import numpy as np

Setting up data loader

In [None]:
dataset = datasets.ImageFolder(data_dir, transform=preprocess)
dataloader = DataLoader(dataset, batch_size=10, num_workers=4)

Retrieval of class indexes in the model

In [None]:
classIndexes = []
for classLabel in classes:
    classIndexes.append(Inception_V3_Weights.DEFAULT.meta["categories"].index(classLabel.lower()))

print(classIndexes)

[470, 107, 963]


Running model predictions on the dataset

In [None]:
true_positives = [0] * len(classes)
true_negatives = [0] * len(classes)
false_positives = [0] * len(classes)
false_negatives = [0] * len(classes)

with torch.no_grad():
  for images, labels in dataloader:
    images = images.to(device)
    labels = labels.to(device)
    outputs = model(images)

    threshold = 0.8
    probabilitiesForBatch = torch.softmax(outputs, dim=1)

    # Calculate TP, TN, FP, FN
    for labelIndex, probabilitiesForClasses in enumerate(probabilitiesForBatch):
      for j, index in enumerate(classIndexes):
        if probabilitiesForClasses[index].item() >= threshold:
            if classIndexes[labels[labelIndex].item()] == index:
                true_positives[j] += 1
            else:
                false_positives[j] += 1
        else:
            if classIndexes[labels[labelIndex].item()] != index:
                true_negatives[j] += 1
            else:
                false_negatives[j] += 1

print(true_positives, true_negatives, false_positives, false_negatives)

[288, 261, 211] [788, 909, 879] [0, 0, 0] [212, 118, 198]


In [None]:
TP = 0
TN = 0
FP = 0
FN = 0

for i in range(len(classIndexes)):
  TP += true_positives[i]
  TN += true_negatives[i]
  FP += false_positives[i]
  FN += false_negatives[i]

print(TP, TN, FP, FN)

760 2576 0 528


**Calculation of required metrics:**

_Accuracy_ - number of examples correctly predicted / total number of examples

$Accuracy = \frac{TP + TN}{TP + FP + TN + FN}$

_Recall_ - number of samples actually and predicted as `Positive` / total number of samples actually `Positive`

$Recall = \frac{TP}{TP + FN}$

_Precision_ - number of samples actually and predicted as `Positive` / total number of samples predicted as `Positive`

$Precision = \frac{TP}{TP + FP}$

_F1_ - Harmonic Mean of Precision and Recall

$F1 = 2 \cdot \frac{Precision \cdot Recall}{Precision + Recall}$


In [None]:
metrics = {}
metrics['accuracy'] = (TP + TN) / (TP + FP + TN + FN)
metrics['recall'] = TP / (TP + FN)
metrics['precision'] = TP / (TP + FP)
metrics['F1'] = 2 * (metrics['precision'] * metrics['recall']) / (metrics['precision'] + metrics['recall'])
print(metrics)

{'accuracy': 0.8633540372670807, 'recall': 0.5900621118012422, 'precision': 1.0, 'F1': 0.7421875}
