In [None]:
!pip install Pillow matplotlib


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import os
from torch.utils.data import random_split
import torch.nn as nn

In [None]:
dataset_path = './dataset/dataset.csv'
image_path = './data/abo-images-small/images/small/'

In [None]:
from matplotlib import pyplot as plt
from PIL import Image

def show_image(image_url):
    # Load the image using PIL
    img = Image.open(image_url)

    plt.imshow(img)
    plt.axis('off') 
    plt.show()

In [None]:
show_image(f'{image_path}6a/6a8d8df6.jpg')

In [None]:
class MultiLabelImageDataset(Dataset):
    def __init__(self, file_path, img_dir, transform=None):
        # read csv file
        self.data = pd.read_csv(file_path)
        self.img_dir = img_dir
        self.transform = transform
        
        # Convert category labels to unique indices
        self.category_labels = {}
        for i in range(1, 7):
            column_name = f'category_{i}'
            self.data[column_name] = self.data[column_name].fillna('UNK')
            self.data[f'{column_name}_idx'] = self.data[column_name].astype('category').cat.codes
            self.category_labels[column_name] = dict(enumerate(self.data[column_name].astype('category').cat.categories))
        
        self.num_categories = 6

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = os.path.join(self.img_dir, self.data.iloc[idx, 2])
        image = Image.open(img_name)
        
        labels = []
        for i in range(1, 7):
            labels.append(self.data.iloc[idx, -(self.num_categories - (i - 1))])
        
        if self.transform:
            image = self.transform(image)
        
        return image, torch.tensor(labels)

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

batch_size = 32
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
dataset = MultiLabelImageDataset(dataset_path, image_path, transform=transform)

In [None]:
dataset_size = len(dataset)
train_size = int(0.7 * dataset_size)  # 70% training
dev_size = int(0.15 * dataset_size)   # 15% validation 
test_size = dataset_size - train_size - dev_size  # 15% test

# randomly split the dataset
train_dataset, dev_dataset, test_dataset = random_split(dataset, [train_size, dev_size, test_size])

In [None]:
# DataLoaders for each dataset

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_dataloader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
class MultiTaskCNN(nn.Module):
    def __init__(self, num_classes_list):
        super(MultiTaskCNN, self).__init__()

        # feature extraction block
        self.conv_block = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        # fully connected layers
        self.fc_block = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 14 * 14, 512),
            nn.ReLU()
        )

        # classifier layers for each category level
        self.fc1 = nn.Linear(512, num_classes_list[0])
        self.fc2 = nn.Linear(512, num_classes_list[1])
        self.fc3 = nn.Linear(512, num_classes_list[2])
        self.fc4 = nn.Linear(512, num_classes_list[3])
        self.fc5 = nn.Linear(512, num_classes_list[4])
        self.fc6 = nn.Linear(512, num_classes_list[5])

    def forward(self, x):
        # Shared features
        x = self.conv_block(x)
        x = self.fc_block(x)

        # separate classifier layers for 
        output1 = self.fc1(x)
        output2 = self.fc2(x)
        output3 = self.fc3(x)
        output4 = self.fc4(x)
        output5 = self.fc5(x)
        output6 = self.fc6(x)

        return output1, output2, output3, output4, output5, output6