In [54]:
import os
import numpy as np
import json
from skmultilearn.model_selection import iterative_train_test_split
import torch

from torch.utils.data import Dataset
from torchvision import transforms
from torch.utils.data import DataLoader
from PIL import Image

import torch.nn as nn
import torch.nn.functional as F

In [55]:
dataset_directory = 'dataset'

images = []
labels = []

for recipe_folder in os.listdir(dataset_directory):
    recipe_path = os.path.join(dataset_directory, recipe_folder)
    allergens_file = os.path.join(recipe_path, 'allergens.json')

    if os.path.isfile(allergens_file):
        with open(allergens_file, 'r') as f:
            data = json.load(f)
            allergens = data['allergens']

            for image in data['images']:
                images.append(os.path.join(dataset_directory, recipe_folder, image))
                labels.append(allergens)

In [56]:
x = np.array(images).reshape(-1,1) # iterative_train_test_split expects 2d array
y = np.array(labels)

In [57]:
x_train, y_train, x_temp, y_temp = iterative_train_test_split(x, y, test_size=0.3) # http://scikit.ml/stratification.html

In [58]:
x_test, y_test, x_val, y_val = iterative_train_test_split(x_temp, y_temp, test_size=0.5)

In [59]:
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape, x_val.shape, y_val.shape)

(43415, 1) (43415, 14) (9304, 1) (9304, 14) (9303, 1) (9303, 14)


In [60]:
x_train

array([['dataset\\$25_pumpkin_pie\\images/image_2.jpg'],
       ["dataset\\'get_up_&_go'_bars\\images/image_2.jpg"],
       ['dataset\\(panera_bread)_black_bean_soup\\images/image_2.jpg'],
       ...,
       ["dataset\\zurie's_overnight_no-knead_bread\\images/image_3.jpg"],
       ["dataset\\zurie's_overnight_no-knead_bread\\images/image_4.jpg"],
       ["dataset\\zurie's_overnight_no-knead_bread\\images/image_5.jpg"]],
      shape=(43415, 1), dtype='<U93')

In [61]:
# https://pytorch.org/tutorials/beginner/basics/data_tutorial.html#creating-a-custom-dataset-for-your-files

class FoodAllergenDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None, target_transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx][0]
        image = Image.open(img_path).convert('RGB') # Convert all images to 3 channel RGB as dataset contains some 4 channel RGBA images 
        label = torch.tensor(self.labels[idx], dtype=torch.float32)

        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)

        return image, label

In [62]:
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Test more transforms later in training - cropping, rotation, centering etc.

In [63]:
train_dataset = FoodAllergenDataset(x_train, y_train, transform=transform)
val_dataset = FoodAllergenDataset(x_val, y_val, transform=transform)
test_dataset = FoodAllergenDataset(x_test, y_test, transform=transform)

In [64]:
# Hyperparameters
learning_rate = 1e-3
batch_size = 64
epochs = 5

In [65]:
# Create DataLoaders
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)   # Adjust batch size for training
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [66]:
for images, labels in train_dataloader:
    print(images.shape)  
    print(labels.shape)

torch.Size([64, 3, 128, 128])
torch.Size([64, 14])
torch.Size([64, 3, 128, 128])
torch.Size([64, 14])
torch.Size([64, 3, 128, 128])
torch.Size([64, 14])
torch.Size([64, 3, 128, 128])
torch.Size([64, 14])
torch.Size([64, 3, 128, 128])
torch.Size([64, 14])
torch.Size([64, 3, 128, 128])
torch.Size([64, 14])
torch.Size([64, 3, 128, 128])
torch.Size([64, 14])
torch.Size([64, 3, 128, 128])
torch.Size([64, 14])
torch.Size([64, 3, 128, 128])
torch.Size([64, 14])
torch.Size([64, 3, 128, 128])
torch.Size([64, 14])
torch.Size([64, 3, 128, 128])
torch.Size([64, 14])
torch.Size([64, 3, 128, 128])
torch.Size([64, 14])
torch.Size([64, 3, 128, 128])
torch.Size([64, 14])
torch.Size([64, 3, 128, 128])
torch.Size([64, 14])
torch.Size([64, 3, 128, 128])
torch.Size([64, 14])
torch.Size([64, 3, 128, 128])
torch.Size([64, 14])
torch.Size([64, 3, 128, 128])
torch.Size([64, 14])
torch.Size([64, 3, 128, 128])
torch.Size([64, 14])
torch.Size([64, 3, 128, 128])
torch.Size([64, 14])
torch.Size([64, 3, 128, 128])
t

KeyboardInterrupt: 

In [75]:
# Sample CNN model from pytorch

class Net(nn.Module):
    def __init__(self, num_labels=14):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 29 * 29, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, num_labels)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 29 * 29)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()

In [76]:
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

Using cpu device


In [77]:
model = Net().to(device)
print(model)

Net(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=13456, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=14, bias=True)
)


In [78]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (images, labels) in enumerate(dataloader):
        prediction = model(images)
        loss = loss_fn(prediction, labels)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(images)
            print(f"loss: {loss:.7f} [{current:5d}/{size:5d}]")


In [79]:
# Define loss function
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [80]:
for t in range (epochs):
    print(f"Epoch {t+1}/{epochs}")
    train_loop(train_dataloader, model, loss_fn, optimizer)
print(f"Training Complete")

Epoch 1/5
loss: 0.6980657 [    0/43415]
loss: 0.6939517 [ 6400/43415]
loss: 0.6902030 [12800/43415]
loss: 0.6857356 [19200/43415]
loss: 0.6842012 [25600/43415]
loss: 0.6797877 [32000/43415]
loss: 0.6758052 [38400/43415]
Epoch 2/5
loss: 0.6702064 [    0/43415]
loss: 0.6635653 [ 6400/43415]
loss: 0.6597129 [12800/43415]
loss: 0.6465059 [19200/43415]
loss: 0.6298174 [25600/43415]
loss: 0.6064492 [32000/43415]
loss: 0.5700330 [38400/43415]
Epoch 3/5
loss: 0.5189829 [    0/43415]
loss: 0.4488837 [ 6400/43415]
loss: 0.3732600 [12800/43415]
loss: 0.3209126 [19200/43415]
loss: 0.3018375 [25600/43415]
loss: 0.2802851 [32000/43415]
loss: 0.3072897 [38400/43415]
Epoch 4/5
loss: 0.2962787 [    0/43415]
loss: 0.2813539 [ 6400/43415]
loss: 0.2823711 [12800/43415]
loss: 0.3156386 [19200/43415]
loss: 0.3402604 [25600/43415]
loss: 0.2781127 [32000/43415]
loss: 0.2663131 [38400/43415]
Epoch 5/5
loss: 0.2976922 [    0/43415]
loss: 0.2451270 [ 6400/43415]
loss: 0.2877372 [12800/43415]
loss: 0.2799789 [192