In [1]:
classification_names = {
    0: 'cloth',
    1: 'shoe',
    2: 'bag',
    3: 'pants',
    4: 'watch'
}


In [3]:
import os
import re
import numpy as np
import pandas as pd

from PIL import Image

import torch
from torch.utils.data import Dataset, DataLoader, random_split



In [6]:
def sorted_alphanum(img_names):
    convert = lambda text: int(text) if text.isdigit() else text.lower()
    alphanum_key = lambda img_name: [convert(x) for x in re.split(r'([0-9]+)', img_name)]
    return sorted(img_names, key=alphanum_key)

class ImageLabelDataSet(Dataset):
    def __init__(self, image_dir, transform=None):
        super(ImageLabelDataSet, self).__init__()
        self.main_dir = image_dir
        self.transform = transform
        self.image_names = sorted_alphanum(os.listdir(self.main_dir))
        self.labels = pd.read_csv("../common/fashion-labels.csv")
        self.label_dict = dict(zip(self.labels['id'], self.labels['target']))

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        image_loc = os.path.join(self.main_dir, self.image_names[idx])
        image = Image.open(image_loc).convert('RGB')
        if self.transform is not None:
            tenser_img = self.transform(image)
        else:
            raise ValueError("transform is not defined")

        label = self.label_dict[idx]
        return tenser_img, label

In [7]:
import torchvision.transforms as T

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
transform = T.Compose([
    T.Resize((64, 64)),
    T.ToTensor()
])

full_dataset = ImageLabelDataSet(image_dir="../common/dataset/", transform=transform)

print(len(full_dataset))

24853


In [8]:
from torch.utils.data import random_split
train_set, test_set = random_split(full_dataset, [0.75, 0.25])

In [9]:
batch_size = 32
train_loader = DataLoader(
    train_set,
    batch_size=batch_size,
    shuffle=True,
    drop_last=True,
)
test_loader = DataLoader(
    test_set,
    batch_size=batch_size
)

In [10]:
import torch.nn as nn

class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=8, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.linear = nn.Linear(in_features=16 * 16 * 16, out_features=5)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = self.pool(x)
        x = torch.relu(self.conv2(x))
        x = self.pool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.linear(x)
        return x

In [11]:
model = Classifier()
print(model)

Classifier(
  (conv1): Conv2d(3, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (linear): Linear(in_features=4096, out_features=5, bias=True)
)


In [13]:
data_iter = iter(train_loader)
images, labels = next(data_iter)

In [15]:
output = model(images)
print(output.shape)

torch.Size([32, 5])
