In [1]:
pip install openimages torchmetrics tabulate

Note: you may need to restart the kernel to use updated packages.


In [2]:
import torch
import torch.nn as nn
import torchmetrics
import torchvision.transforms as transforms
from PIL import Image
import os
from openimages.download import download_dataset
from torch.utils.data import Dataset, random_split, DataLoader
from glob import glob
import numpy as np
from tabulate import tabulate

batch_size = 100
num_classes = 3
learning_rate = 0.001
num_epochs = 40

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

  from .autonotebook import tqdm as notebook_tqdm


cuda


In [3]:
all_transforms = transforms.Compose([transforms.Resize((227,227)),
                                     transforms.ToTensor(),
                                     transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                          std=[0.485, 0.456, 0.406])
                                     ])

data_dir = "data"

classes = ["Person", "Food", "Animal"]

if not os.path.exists(data_dir):
  os.makedirs(data_dir)

print("Downloading is starting...")
download_dataset(data_dir, classes, limit=5000)

Downloading is starting...


In [4]:
class CustomDataset(Dataset):
  def __init__(self, images_dir, transform=None):
    self.images_dir = images_dir
    self.transform = transform

    self.class1_files = glob(self.images_dir + "/{}/images/*.jpg".format(classes[0].lower()))
    self.class2_files = glob(self.images_dir + "/{}/images/*.jpg".format(classes[1].lower()))
    self.class3_files = glob(self.images_dir + "/{}/images/*.jpg".format(classes[2].lower()))

    self.class1 = len(self.class1_files)
    self.class2 = len(self.class2_files)

    self.files = self.class1_files + self.class2_files + self.class3_files

    self.labels = np.zeros(len(self.files))
    self.labels[self.class1:] = 1
    self.labels[self.class1 + self.class2:] = 2 

    self.order =  [x for x in np.random.permutation(len(self.labels))]
    self.files = [self.files[x] for x in self.order]
    self.labels = [self.labels[x] for x in self.order]

  def __len__(self):
    return (len(self.labels))

  def __getitem__(self, i):
    img_path = self.files[i]

    img = Image.open(img_path).convert("RGB")

    if self.transform:
      img = self.transform(img)
            
    y = self.labels[i]
    return (img, y)

In [5]:
class ConvNeuralNet(nn.Module):
    def __init__(self, num_classes):
        super(ConvNeuralNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),
            nn.BatchNorm2d(96),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(384),
            nn.ReLU())
        self.layer4 = nn.Sequential(
            nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(384),
            nn.ReLU())
        self.layer5 = nn.Sequential(
            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(9216, 4096),
            nn.ReLU())
        self.fc1 = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU())
        self.fc2= nn.Sequential(
            nn.Linear(4096, num_classes))
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

In [6]:
model = ConvNeuralNet(num_classes)
model.to(device)

full_dataset = CustomDataset("./data", all_transforms)
train_size = int(0.8 * len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])

train_dataloader = DataLoader(train_dataset, batch_size)
test_dataloader = DataLoader(test_dataset, batch_size)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.005, momentum = 0.9) 

In [7]:
model.train()

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_dataloader):  
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        
        outputs = model(images)
        labels = labels.to(torch.int64)
        loss = criterion(outputs, labels)
    
        loss.backward()
        optimizer.step()

    print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

model.eval()

torch.save(model.state_dict(), "checkpoint.pth")

Epoch [1/40], Loss: 0.7527
Epoch [2/40], Loss: 0.6625
Epoch [3/40], Loss: 0.6068
Epoch [4/40], Loss: 0.5730
Epoch [5/40], Loss: 0.5500
Epoch [6/40], Loss: 0.5357
Epoch [7/40], Loss: 0.5010
Epoch [8/40], Loss: 0.4945
Epoch [9/40], Loss: 0.4612
Epoch [10/40], Loss: 0.4190
Epoch [11/40], Loss: 0.4461
Epoch [12/40], Loss: 0.3615
Epoch [13/40], Loss: 0.3949
Epoch [14/40], Loss: 0.3114
Epoch [15/40], Loss: 0.3745
Epoch [16/40], Loss: 0.3153
Epoch [17/40], Loss: 0.2845
Epoch [18/40], Loss: 0.3220
Epoch [19/40], Loss: 0.2072
Epoch [20/40], Loss: 0.1803
Epoch [21/40], Loss: 0.4005
Epoch [22/40], Loss: 0.2287
Epoch [23/40], Loss: 0.2512
Epoch [24/40], Loss: 0.2558
Epoch [25/40], Loss: 0.1145
Epoch [26/40], Loss: 0.1332
Epoch [27/40], Loss: 0.1318
Epoch [28/40], Loss: 0.2004
Epoch [29/40], Loss: 0.1540
Epoch [30/40], Loss: 0.1638
Epoch [31/40], Loss: 0.1903
Epoch [32/40], Loss: 0.1048
Epoch [33/40], Loss: 0.1275
Epoch [34/40], Loss: 0.1434
Epoch [35/40], Loss: 0.1798
Epoch [36/40], Loss: 0.1387
E

ConvNeuralNet(
  (layer1): Sequential(
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (0): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (layer4): Sequential(
    (0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReL

In [63]:
model.load_state_dict(torch.load('checkpoint.pth'))
model.eval()

ConvNeuralNet(
  (layer1): Sequential(
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (0): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (layer4): Sequential(
    (0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReL

In [9]:
targets = []
predictions = []

with torch.no_grad():
    for images, labels in test_dataloader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        for i, prediction in enumerate(outputs):
            predictions.append(prediction.tolist())
            temp = np.zeros(3, dtype=int)
            temp[int(labels[i].item())] = 1
            targets.append(temp)

In [10]:
def calculateMetrics(predictions, targets):
    thresholds = (x * 0.1 for x in range(1, 10))
    metrics = []

    for t in thresholds:
        accuracy_metric = torchmetrics.classification.MultilabelAccuracy(num_labels = num_classes, threshold = t, average = "micro")
        accuracy = "{:.3f}".format(accuracy_metric(predictions, targets).item())

        precision_metric = torchmetrics.classification.MultilabelPrecision(num_labels = num_classes, threshold = t, average = "micro")
        precision = "{:.3f}".format(precision_metric(predictions, targets).item())

        recall_metric = torchmetrics.classification.MultilabelRecall(num_labels = num_classes, threshold = t, average = "micro")
        recall = "{:.3f}".format(recall_metric(predictions, targets).item())

        f1_metric = torchmetrics.classification.MultilabelF1Score(num_labels = num_classes, threshold = t, average = "micro")
        f1 = "{:.3f}".format(f1_metric(predictions, targets).item())

        metrics.append([t, accuracy, precision, recall, f1])

    return metrics

predictions_tensor = torch.tensor(predictions)
targets_tensor = torch.tensor(targets)

data = calculateMetrics(predictions_tensor, targets_tensor)
col_names = ["Threshold", "Accuracy", "Precision", "Recall", "F1"]

print(tabulate(data, headers=col_names))

  Threshold    Accuracy    Precision    Recall     F1
-----------  ----------  -----------  --------  -----
        0.1       0.642        0.48      0.911  0.629
        0.2       0.681        0.513     0.881  0.648
        0.3       0.706        0.537     0.858  0.661
        0.4       0.725        0.558     0.842  0.671
        0.5       0.737        0.574     0.819  0.675
        0.6       0.747        0.59      0.792  0.676
        0.7       0.76         0.613     0.761  0.679
        0.8       0.771        0.638     0.722  0.677
        0.9       0.782        0.676     0.664  0.67


In [11]:
def predict(image_path, model, labels):
  img = Image.open(image_path)
  img_tensor = all_transforms(img).to(device).unsqueeze(0)

  output = model(img_tensor)
  pred = output.data.cpu().numpy().argmax()  # Get predicted class number
 
  print('Predicted class: {}'.format(labels[pred]))