In [4]:
import os
import numpy as np
import json
from skmultilearn.model_selection import iterative_train_test_split
import torch

from torch.utils.data import Dataset
from torchvision import transforms
from torch.utils.data import DataLoader
from PIL import Image

import torch.nn as nn
import torch.nn.functional as F

In [5]:
dataset_directory = 'dataset'

images = []
labels = []

for recipe_folder in os.listdir(dataset_directory):
    recipe_path = os.path.join(dataset_directory, recipe_folder)
    allergens_file = os.path.join(recipe_path, 'allergens.json')

    if os.path.isfile(allergens_file):
        with open(allergens_file, 'r') as f:
            data = json.load(f)
            allergens = data['allergens']

            for image in data['images']:
                images.append(os.path.join(dataset_directory, recipe_folder, image))
                labels.append(allergens)

In [6]:
x = np.array(images).reshape(-1,1) # iterative_train_test_split expects 2d array
y = np.array(labels)

In [7]:
x_train, y_train, x_temp, y_temp = iterative_train_test_split(x, y, test_size=0.3) # http://scikit.ml/stratification.html

In [8]:
x_test, y_test, x_val, y_val = iterative_train_test_split(x_temp, y_temp, test_size=0.5)

In [9]:
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape, x_val.shape, y_val.shape)

(43415, 1) (43415, 14) (9304, 1) (9304, 14) (9303, 1) (9303, 14)


In [10]:
x_train

array([['dataset\\$25_pumpkin_pie\\images/image_2.jpg'],
       ["dataset\\'get_up_&_go'_bars\\images/image_2.jpg"],
       ['dataset\\(panera_bread)_black_bean_soup\\images/image_2.jpg'],
       ...,
       ["dataset\\zurie's_overnight_no-knead_bread\\images/image_3.jpg"],
       ["dataset\\zurie's_overnight_no-knead_bread\\images/image_4.jpg"],
       ["dataset\\zurie's_overnight_no-knead_bread\\images/image_5.jpg"]],
      shape=(43415, 1), dtype='<U93')

In [11]:
# https://pytorch.org/tutorials/beginner/basics/data_tutorial.html#creating-a-custom-dataset-for-your-files

class FoodAllergenDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None, target_transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx][0]
        image = Image.open(img_path).convert('RGB') # Convert all images to 3 channel RGB as dataset contains some 4 channel RGBA images 
        label = torch.tensor(self.labels[idx], dtype=torch.float32)

        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)

        return image, label

In [12]:
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Test more transforms later in training - cropping, rotation, centering etc.

In [13]:
train_dataset = FoodAllergenDataset(x_train, y_train, transform=transform)
val_dataset = FoodAllergenDataset(x_val, y_val, transform=transform)
test_dataset = FoodAllergenDataset(x_test, y_test, transform=transform)

In [14]:
# Hyperparameters
learning_rate = 1e-3
batch_size = 64
epochs = 5

In [15]:
# Create DataLoaders
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)   # Adjust batch size for training
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [16]:
for images, labels in train_dataloader:
    print(images.shape)  
    print(labels.shape)

torch.Size([64, 3, 128, 128])
torch.Size([64, 14])
torch.Size([64, 3, 128, 128])
torch.Size([64, 14])
torch.Size([64, 3, 128, 128])
torch.Size([64, 14])
torch.Size([64, 3, 128, 128])
torch.Size([64, 14])
torch.Size([64, 3, 128, 128])
torch.Size([64, 14])
torch.Size([64, 3, 128, 128])
torch.Size([64, 14])
torch.Size([64, 3, 128, 128])
torch.Size([64, 14])
torch.Size([64, 3, 128, 128])
torch.Size([64, 14])
torch.Size([64, 3, 128, 128])
torch.Size([64, 14])
torch.Size([64, 3, 128, 128])
torch.Size([64, 14])


KeyboardInterrupt: 

In [17]:
# Sample CNN model from pytorch

class Net(nn.Module):
    def __init__(self, num_labels=14):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 29 * 29, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, num_labels)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 29 * 29)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()

In [18]:
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

Using cpu device


In [19]:
model = Net().to(device)
print(model)

Net(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=13456, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=14, bias=True)
)


In [20]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (images, labels) in enumerate(dataloader):
        prediction = model(images)
        loss = loss_fn(prediction, labels)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(images)
            print(f"loss: {loss:.7f} [{current:5d}/{size:5d}]")


In [21]:
# Define loss function
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [22]:
for t in range (epochs):
    print(f"Epoch {t+1}/{epochs}")
    train_loop(train_dataloader, model, loss_fn, optimizer)
print(f"Training Complete")

Epoch 1/5
loss: 0.7060986 [    0/43415]
loss: 0.7011027 [ 6400/43415]
loss: 0.6993517 [12800/43415]
loss: 0.6958669 [19200/43415]
loss: 0.6916252 [25600/43415]




loss: 0.6868683 [32000/43415]
loss: 0.6838775 [38400/43415]
Epoch 2/5
loss: 0.6802402 [    0/43415]
loss: 0.6766976 [ 6400/43415]
loss: 0.6713939 [12800/43415]
loss: 0.6615598 [19200/43415]
loss: 0.6507159 [25600/43415]
loss: 0.6365322 [32000/43415]
loss: 0.6158715 [38400/43415]
Epoch 3/5
loss: 0.5849535 [    0/43415]
loss: 0.5337242 [ 6400/43415]
loss: 0.4623036 [12800/43415]
loss: 0.3621225 [19200/43415]
loss: 0.3101289 [25600/43415]
loss: 0.2899132 [32000/43415]
loss: 0.3166189 [38400/43415]
Epoch 4/5
loss: 0.2799618 [    0/43415]
loss: 0.2659309 [ 6400/43415]
loss: 0.3247974 [12800/43415]
loss: 0.2780384 [19200/43415]
loss: 0.2901067 [25600/43415]
loss: 0.3075986 [32000/43415]
loss: 0.2735393 [38400/43415]
Epoch 5/5
loss: 0.2965017 [    0/43415]
loss: 0.2907738 [ 6400/43415]
loss: 0.2862725 [12800/43415]
loss: 0.2801693 [19200/43415]
loss: 0.2541184 [25600/43415]
loss: 0.2739074 [32000/43415]
loss: 0.3009950 [38400/43415]
Training Complete


In [28]:
correct = 0
total = 0

model.eval()  # Set the model to evaluation mode
with torch.no_grad():  # Disable gradient computation
    for images, labels in test_dataloader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images)
        predicted_probs = torch.sigmoid(outputs)  # Convert logits to probabilities
        predicted = (predicted_probs > 0.5).float()  # Apply threshold to get binary predictions

        # Count total labels
        total += labels.numel()  # Total number of labels in the dataset. i.e. 14 labels per image.

        # Count correctly predicted labels
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy of the network on the test dataset: {accuracy:.2f} %')


Accuracy of the network on the test dataset: 88.54 %


In [34]:
with_ones_count = np.sum(np.any(y_test == 1, axis=1))
no_ones_count = np.sum(np.all(y_test == 0, axis=1))

print(f"Number of entries in y_test without any '1's: {no_ones_count}")
print(f"Number of entries in y_test with at least one '1': {with_ones_count}")


Number of entries in y_test without any '1's: 1194
Number of entries in y_test with at least one '1': 8110
