In [81]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

import torch
import torchvision
import torchvision.transforms as transforms

from PIL import Image

img_dir = "images/train/"

csv_file = "products.csv"

products = pd.read_csv("products.csv")

# label list
# classes = np.unique(products['GS1 Form'])

# Define relevant variables for the ML task
batch_size = 64
num_classes = len(classes)
learning_rate = 0.01
num_epochs = 10

# normalizing data ...
transform = transforms.Compose([transforms.Resize((32,32)),
                                     transforms.ToTensor(),
                                     transforms.Normalize(mean=[0.4914, 0.4822, 0.4465],
                                                          std=[0.2023, 0.1994, 0.2010])
                                     ])

# Device will determine whether to run the training on GPU or CPU.
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [97]:
"""Loading data with PyTorch"""

from torch.utils.data import DataLoader, Dataset
import random
import os

def one_hot_encode(labels, num_classes):
    one_hot = np.zeros((len(labels), num_classes))
    one_hot[np.arange(len(labels)), labels] = 1
    return one_hot

class MultiLabelDataset(Dataset):
    def __init__(self, img_dir, csv_file, transform=None):
        self.dataframe = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.img_paths = [str(barcode) + '.jpg' for barcode in self.dataframe['Barcode'].values]
        self.labels = {'GS1 Form':self.dataframe['GS1 Form'].values, 'Material':self.dataframe['Material'].values, 'Colour':self.dataframe['Colour'].values}
        
        # dictionary for each class
        self.gs1_form = { val:i for i, val in enumerate(list(set(self.labels['GS1 Form'])))}
        self.material = { val:i for i, val in enumerate(list(set(self.labels['Material'])))}
        self.colour = { val:i for i, val in enumerate(list(set(self.labels['Colour'])))}
        
        self.transform = transform

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_paths[idx])
        img = Image.open(img_path).convert('RGB')
        
        label1 = self.gs1_form[self.labels['GS1 Form'][idx]]
        label2 = self.material[self.labels['Material'][idx]]
        label3 = self.colour[self.labels['Colour'][idx]]
        
        label = torch.tensor([label1, label2, label3])
        num_classes = len(self.gs1_form) + len(self.material) + len(self.colour)
        
        label = torch.nn.functional.one_hot(label, num_classes = num_classes)
        
        if self.transform:
            img = self.transform(img)
        return img, label

class MultiLabelDataLoader(DataLoader):
    def __init__(self, dataset, batch_size=batch_size, shuffle=False, num_workers=0):
        super(MultiLabelDataLoader, self).__init__(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)

    def __iter__(self):
        for i, (data, label) in enumerate(super().__iter__()):
            yield (data, label)

    def __len__(self):
        return len(self.dataset)  

# Create an instance of the custom dataset class
train_dataset = MultiLabelDataset(img_dir, csv_file, transform=transform)

# Create an instance of the custom data loader class
train_loader = MultiLabelDataLoader(train_dataset, batch_size=batch_size, shuffle=True)

data_iter = iter(train_loader)
images, labels = next(data_iter)

labels

tensor([[[0, 1, 0,  ..., 0, 0, 0],
         [0, 1, 0,  ..., 0, 0, 0],
         [1, 0, 0,  ..., 0, 0, 0]],

        [[0, 0, 0,  ..., 0, 0, 0],
         [0, 1, 0,  ..., 0, 0, 0],
         [1, 0, 0,  ..., 0, 0, 0]],

        [[0, 0, 1,  ..., 0, 0, 0],
         [0, 0, 1,  ..., 0, 0, 0],
         [0, 1, 0,  ..., 0, 0, 0]],

        ...,

        [[0, 0, 1,  ..., 0, 0, 0],
         [0, 0, 1,  ..., 0, 0, 0],
         [0, 1, 0,  ..., 0, 0, 0]],

        [[0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [1, 0, 0,  ..., 0, 0, 0]],

        [[0, 0, 0,  ..., 0, 0, 0],
         [0, 1, 0,  ..., 0, 0, 0],
         [0, 0, 1,  ..., 0, 0, 0]]])