Installing dataset

In [None]:
#move kaggle.json into files first
! pip install kaggle
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json



In [None]:
!kaggle datasets download paramaggarwal/fashion-product-images-dataset -p .

Downloading fashion-product-images-dataset.zip to .
100% 23.1G/23.1G [07:21<00:00, 90.6MB/s]
100% 23.1G/23.1G [07:21<00:00, 56.0MB/s]


In [None]:
!unzip -q /content/fashion-product-images-dataset.zip

In [None]:
!rm /content/fashion-product-images-dataset.zip

CSV file processing

In [None]:
import csv
import numpy as np
np.random.seed(0)

image_names = []
class_names = []

f1 = open("/content/fashion-dataset/styles.csv")
csv_f1 = csv.reader(f1)
for i, row in enumerate(csv_f1):
  if i != 0:
    image_names.append(str(row[0]))
    class_names.append(str(row[3]))
f1.close()

unique_classes = np.unique(class_names)
class_dict = dict()
for i in range(len(unique_classes)):
  class_dict[unique_classes[i]] = i
print("The classes are: ", class_dict)

##Some files were found to be missing in this Kaggle dataset
##These will be skipped when writing the CSV file
missing_files = {'39403', '39410', '39401', '39425', '12347'}

f2 = open("/content/fashion-dataset/labels.csv", mode='w', newline='')
csv_f2 = csv.writer(f2, delimiter=',')
csv_f2.writerow(['filename', 'class'])
for i in range(len(image_names)):
  if image_names[i] not in missing_files:
    csv_f2.writerow([image_names[i]+'.jpg', class_dict[class_names[i]]])
f2.close()

The classes are:  {'Accessories': 0, 'Apparel Set': 1, 'Bags': 2, 'Bath and Body': 3, 'Beauty Accessories': 4, 'Belts': 5, 'Bottomwear': 6, 'Cufflinks': 7, 'Dress': 8, 'Eyes': 9, 'Eyewear': 10, 'Flip Flops': 11, 'Fragrance': 12, 'Free Gifts': 13, 'Gloves': 14, 'Hair': 15, 'Headwear': 16, 'Home Furnishing': 17, 'Innerwear': 18, 'Jewellery': 19, 'Lips': 20, 'Loungewear and Nightwear': 21, 'Makeup': 22, 'Mufflers': 23, 'Nails': 24, 'Perfumes': 25, 'Sandal': 26, 'Saree': 27, 'Scarves': 28, 'Shoe Accessories': 29, 'Shoes': 30, 'Skin': 31, 'Skin Care': 32, 'Socks': 33, 'Sports Accessories': 34, 'Sports Equipment': 35, 'Stoles': 36, 'Ties': 37, 'Topwear': 38, 'Umbrellas': 39, 'Vouchers': 40, 'Wallets': 41, 'Watches': 42, 'Water Bottle': 43, 'Wristbands': 44}


Importing libraries + setting seed

In [None]:
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import matplotlib.pyplot as plt
import math
import numpy as np
import datetime
import os
import pandas as pd
from PIL import Image
#from skimage import io

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
seed = 0
np.random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x7feb33e1a9d0>

Image transformations, datasets and dataloaders

In [None]:
#https://pytorch.org/tutorials/beginner/basics/data_tutorial.html
img_dir = "/content/fashion-dataset/fashion-dataset/images"
batch_size = 64 #128 #512 #256 #revisit this

#transformations
def normalize_transform():
  return transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

data_transforms = transforms.Compose([
                                      transforms.Resize(256), 
                                      transforms.CenterCrop(224),
                                      transforms.RandomHorizontalFlip(p=0.5),
                                      transforms.ToTensor(),
                                      normalize_transform()
                                      ])

#custom datasets
class FashionProducts(torch.utils.data.Dataset):
  def  __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
    self.img_labels = pd.read_csv(annotations_file)
    self.img_dir = img_dir
    self.transform = transform
    self.target_transform = target_transform

  def __len__(self):
    return len(self.img_labels)

  def __getitem__(self, idx):
    img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
    #print(img_path)
    image = Image.open(img_path)
    #image = io.imread(img_path)
    #image = image.to(dtype=torch.float32)
    label = self.img_labels.iloc[idx, 1]
    if self.transform:
      image = self.transform(image)
    if self.target_transform:
      label = self.target_transform(label)
    return image, label

trainvaltest_dataset = FashionProducts(annotations_file = "/content/fashion-dataset/labels.csv",
                                img_dir = img_dir,
                                transform=data_transforms
                                )

#splitting dataset
train_size = 35000
test_size = 5000
val_size = len(trainvaltest_dataset) - train_size - test_size
train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(trainvaltest_dataset, [train_size, val_size, test_size],
                                                                         generator=torch.Generator().manual_seed(seed))

#dataloaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, generator=torch.Generator().manual_seed(seed))
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print("training images: ",len(train_dataset), "; validation images:", len(val_dataset), "; test images: ", len(test_dataset))

training images:  35000 ; validation images: 4441 ; test images:  5000


DON'T RUN FURTHER TRAINING/VALIDATION CELLS UNLESS YOU KNOW WHAT YOU'RE DOING. RISK OF OVERWRITING MODEL WEIGHTS.

Setting hyperparameters

In [None]:
#from torchvision.ops.misc import ConvNormActivation
#https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html#finetuning-the-convnet
num_classes = 45
num_epochs = 2
learning_rate = 0.05 #0.000025 #0.256
##weight_decay = 0.9
momentum = 0.9

#importing pretrained model, freezing layers, and changing output layer
###model = torchvision.models.efficientnet_b0(pretrained=True)

#for param in model.parameters():
#  param.requires_grad = False

#lastconv_input_channels = model.lastconv_input_channels
#lastconv_output_channels = model.lastconv_output_channels
#norm_layer = model.norm_layer
#
#model.features[-1] = ConvNormActivation(
#                lastconv_input_channels,
#                lastconv_output_channels,
#                kernel_size=1,
#                norm_layer=norm_layer,
#                activation_layer=nn.SiLU,
#            )

###num_features = model.classifier[1].in_features #https://pytorch.org/vision/stable/_modules/torchvision/models/efficientnet.html#efficientnet_b0
###model.classifier[1] = nn.Linear(num_features, num_classes) #replaces last layer
###nn.init.kaiming_normal_(model.classifier[1].weight) #initializes weights of last layer

##model = torchvision.models.resnet50(pretrained=True)
model = torchvision.models.resnet152(pretrained=True)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, num_classes)
model = model.to(device)

#check params in original paper
criterion = nn.CrossEntropyLoss()

optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
#scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.9)

###optimizer = optim.RMSprop(model.parameters(), lr=learning_rate, alpha=0.99, eps=1e-08, weight_decay=weight_decay, momentum=momentum, centered=False)
###scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[1500], gamma=0.1)
#scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[i for i in range(1, num_epochs)], gamma=0.97) #original paper uses 2.4

Begin training

In [None]:
if input("Are you sure you want to begin training and saving models? Select (Y/N): ") != "Y":
    exit()
else:
  model.train()
  #inputs, labels = next(iter(train_loader))
  for epoch in range(num_epochs):
    for j, data in enumerate(train_loader, start=0):

      inputs, labels = data
      inputs, labels = inputs.to(device), labels.to(device)

      outputs = model(inputs) #forwards
      loss = criterion(outputs, labels) #calculate loss
      optimizer.zero_grad() #zero the parameter gradients
      loss.backward() #backwards
      optimizer.step() #optimize

      print(f'Batch loss: {loss.item()}')

      #scheduler.step()
  #https://stackoverflow.com/questions/32490629/getting-todays-date-in-yyyy-mm-dd-in-python
  today = datetime.datetime.today().strftime('%Y-%m-%d')
  path = f'/content/drive/MyDrive/{today}_model_2.pth'
  torch.save(model.state_dict(), path)
  print('Model saved.')

Are you sure you want to begin training and saving models? Select (Y/N): Y
Batch loss: 3.8393478393554688
Batch loss: 4.127566814422607
Batch loss: 3.9593968391418457
Batch loss: 2.37223482131958
Batch loss: 1.9328553676605225
Batch loss: 0.9573944211006165
Batch loss: 1.9604616165161133
Batch loss: 1.4972474575042725
Batch loss: 0.8654681444168091
Batch loss: 1.5320483446121216
Batch loss: 1.5422035455703735
Batch loss: 1.0336294174194336
Batch loss: 1.0199122428894043
Batch loss: 0.7696935534477234
Batch loss: 1.0358219146728516
Batch loss: 0.9249795079231262
Batch loss: 0.8556882739067078
Batch loss: 1.250800609588623
Batch loss: 0.8107422590255737
Batch loss: 1.5717309713363647
Batch loss: 1.5552647113800049
Batch loss: 1.074620246887207
Batch loss: 1.2444936037063599
Batch loss: 2.721358060836792
Batch loss: 1.2774097919464111
Batch loss: 1.5310537815093994
Batch loss: 2.0231130123138428
Batch loss: 2.0269765853881836
Batch loss: 1.6906955242156982
Batch loss: 1.4342987537384033
B

Continue training

In [None]:
if input("Are you sure you want to continue training and saving models? Select (Y/N): ") != "Y":
    exit()
else:
  num_epochs = int(input("How many more epochs?: "))
  learning_rate = int(input("Learning rate: "))
  num_classes = 45
  model = torchvision.models.resnet152(pretrained=True)
  num_features = model.fc.in_features
  model.fc = nn.Linear(num_features, num_classes)
  model = model.to(device)

  PATH = "/content/drive/MyDrive/2022-04-26_model_10.pth"

  if device == 'cuda':
    model.load_state_dict(torch.load(PATH, map_location=torch.device('cuda')))
  else:
    model.load_state_dict(torch.load(PATH, map_location=torch.device('cpu')))
  
  optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)

  model.train()
  #inputs, labels = next(iter(train_loader))
  for epoch in range(num_epochs):
    print("Start of epoch")
    for j, (inputs, labels) in enumerate(train_loader, start=0):

      inputs, labels = inputs.to(device), labels.to(device)

      outputs = model(inputs) #forwards
      loss = criterion(outputs, labels) #calculate loss
      optimizer.zero_grad() #zero the parameter gradients
      loss.backward() #backwards
      optimizer.step() #optimize

      print(f'Batch loss: {loss.item()}')

      #scheduler.step()
    print("End of epoch")
  #https://stackoverflow.com/questions/32490629/getting-todays-date-in-yyyy-mm-dd-in-python
  today = datetime.datetime.today().strftime('%Y-%m-%d')
  path = f'/content/drive/MyDrive/{today}_model_13.pth'
  torch.save(model.state_dict(), path)
  print('Model saved.')

Are you sure you want to continue training and saving models? Select (Y/N): Y
How many more epochs?: 2
Start of epoch
Batch loss: 0.03317996859550476
Batch loss: 0.0019977388437837362
Batch loss: 0.0057356394827365875
Batch loss: 0.009362767450511456
Batch loss: 0.07775802165269852
Batch loss: 0.00397285632789135
Batch loss: 0.0021896350663155317
Batch loss: 0.003714280901476741
Batch loss: 0.029969472438097
Batch loss: 0.0021305298432707787
Batch loss: 0.11610826104879379
Batch loss: 0.01308840699493885
Batch loss: 0.006084202788770199
Batch loss: 0.0012797724921256304
Batch loss: 0.07253751903772354
Batch loss: 0.006484930869191885
Batch loss: 0.008894371800124645
Batch loss: 0.05145562067627907
Batch loss: 0.03641821816563606
Batch loss: 0.01646299473941326
Batch loss: 0.016747137531638145
Batch loss: 0.0015456556575372815
Batch loss: 0.0010858927853405476
Batch loss: 0.028108375146985054
Batch loss: 0.04307341203093529
Batch loss: 0.012176466174423695
Batch loss: 0.1545790731906891

Validation accuracy

In [None]:
#importing pretrained model and changing output layer
num_classes = 45
model = torchvision.models.resnet152(pretrained=True)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, num_classes)
model = model.to(device)

PATH = "/content/drive/MyDrive/2022-04-26_model_12.pth"

if device == 'cuda':
  model.load_state_dict(torch.load(PATH, map_location=torch.device('cuda')))
else:
  model.load_state_dict(torch.load(PATH, map_location=torch.device('cpu')))

model.eval()

accuracy_denom = len(val_dataset)
top1_accuracy_numer = 0
with torch.no_grad():
  for j, data in enumerate(val_loader, start=0):
    inputs, labels = data
    inputs, labels = inputs.to(device), labels.to(device)
    outputs = model(inputs)
    _, predicted = torch.max(outputs,1)
    #just like the labels, predicted is a tensor of (usually) batch_size length, with nums corresponding to the most likely classes
    #top-1 accuracy
    for k in range(len(predicted)):
      top1_accuracy_numer += 1 if predicted[k] == labels[k] else 0
print(f"Top-1 accuracy is {top1_accuracy_numer*100/accuracy_denom}%")

Top-1 accuracy is 95.11371312767395%


Test accuracy

In [None]:
#importing pretrained model and changing output layer
num_classes = 45
model = torchvision.models.resnet152(pretrained=True)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, num_classes)
model = model.to(device)

PATH = "/content/drive/MyDrive/2022-04-26_model_8.pth"

if device == 'cuda':
  model.load_state_dict(torch.load(PATH, map_location=torch.device('cuda')))
else:
  model.load_state_dict(torch.load(PATH, map_location=torch.device('cpu')))

model.eval()

accuracy_denom = len(test_dataset)
top1_accuracy_numer = 0
with torch.no_grad():
  for j, data in enumerate(test_loader, start=0):
    inputs, labels = data
    inputs, labels = inputs.to(device), labels.to(device)
    outputs = model(inputs)
    _, predicted = torch.max(outputs,1)
    #just like the labels, predicted is a tensor of (usually) batch_size length, with nums corresponding to the most likely classes
    #top-1 accuracy
    for k in range(len(predicted)):
      top1_accuracy_numer += 1 if predicted[k] == labels[k] else 0
print(f"Top-1 accuracy is {top1_accuracy_numer*100/accuracy_denom}%")

Top-1 accuracy is 93.88%
