In [1]:
import os
import numpy as np
import cv2
import torch, torchvision
import copy
from torchsummary import summary

device = 'cpu'
if torch.cuda.is_available():
    device = 'cuda'

print(device)

cuda


In [2]:
#labelled_data = np.load('./labels/full_data.npy', allow_pickle=True)
train_data = np.load('./train_data.npy', allow_pickle=True)
val_data = np.load('./validation_data.npy', allow_pickle=True)
print(train_data.shape)
print(val_data.shape)

(38519, 19)
(4024, 19)


In [3]:
# Loading Data
import pandas as pd
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

from utils.customDataset import FashionDataset

train_dataset = FashionDataset(train_data, '../data/images_224x329', mode='train')
val_dataset = FashionDataset(val_data, '../data/images_224x329', mode='val')

# Create DataLoader instances
train_loader = DataLoader(dataset = train_dataset, batch_size = 64, shuffle = True, num_workers=8, pin_memory=True)
val_loader = DataLoader(dataset = val_dataset, batch_size = 64, shuffle = False)
images, labels = next(iter(train_loader))
print(images.shape)
print(labels.shape)

torch.Size([64, 3, 329, 224])
torch.Size([64, 18])


In [4]:
# Initialize swin transformer backbone with ImageNet weights
backbone = torchvision.models.swin_t(weights='IMAGENET1K_V1')
# Remove classifier head
backbone.head = torch.nn.Identity()

In [5]:
# Defining the classifier (Swin Transfomer Model)
attribute_classes = [
    6, 5, 4, 3, 5, 3, 3, 3, 5, 8, 3, 3, #Shape Attributes
    8, 8, 8, #Fabric Attributes
    8, 8, 8 #Color Attributes
]

# Classifier with 18 forks (For each of the 18 attribute categories)
class AttributeClassifier(torch.nn.Module):
    def __init__(self, in_features) -> None:
        super().__init__()
        self.forks = torch.nn.ModuleList()
        for class_count in attribute_classes:
            fork = torch.nn.Linear(in_features=in_features, out_features=class_count)
            self.forks.append(fork)
    
    def forward(self, x):
        out = []
        for index,fork in enumerate(self.forks):
            out_fork = fork(x) #Classification
            out.append(out_fork)
        return out

# Model definition
class ClassifierModel(torch.nn.Module):
    def __init__(self, backbone, backbone_out_features) -> None:
        super().__init__()
        self.backbone = backbone
        self.classifier = AttributeClassifier(backbone_out_features)
    
    def forward(self, x):
        out = self.backbone(x)
        out = self.classifier(out)
        return out

model = ClassifierModel(backbone, 768)
model.to(device)

#Freeze all params in backbone
for param in model.backbone.parameters():
    param.requires_grad = False

summary(model, (3, 329, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 96, 82, 56]           4,704
           Permute-2           [-1, 82, 56, 96]               0
         LayerNorm-3           [-1, 82, 56, 96]             192
         LayerNorm-4           [-1, 82, 56, 96]             192
ShiftedWindowAttention-5           [-1, 82, 56, 96]               0
   StochasticDepth-6           [-1, 82, 56, 96]               0
         LayerNorm-7           [-1, 82, 56, 96]             192
            Linear-8          [-1, 82, 56, 384]          37,248
              GELU-9          [-1, 82, 56, 384]               0
          Dropout-10          [-1, 82, 56, 384]               0
           Linear-11           [-1, 82, 56, 96]          36,960
          Dropout-12           [-1, 82, 56, 96]               0
  StochasticDepth-13           [-1, 82, 56, 96]               0
SwinTransformerBlock-14           [

In [None]:
%%capture temp_output
# Training the model
from utils.train_funcs import fit_classifier

epochs = 5
learning_rate = 1e-3
loss_func = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.classifier.parameters(), lr=learning_rate)

train_loss_history, train_acc_history, val_loss_history, val_acc_history = fit_classifier(
    model, 
    train_loader=train_loader, 
    val_loader=val_loader,
    attributes=attribute_classes,
    optimizer=optimizer, 
    loss_func=loss_func, 
    epochs=epochs, 
    device=device
)


In [None]:
print(temp_output)

In [None]:
# Defining the baseline model (ResNet-34 CNN)
resnet_backbone = torchvision.models.resnet34(weights=torchvision.models.ResNet34_Weights.IMAGENET1K_V1)

# Remove Classifier Head
resnet_backbone.fc = torch.nn.Identity()

# Initialize Baseline Classifier and freeze weights
baseline_model = ClassifierModel(resnet_backbone, 512)

for param in baseline_model.backbone.parameters():
    param.requires_grad = False
baseline_model.to(device)
summary(baseline_model, (3, 329, 224))


In [None]:
%%capture temp_output
# Training baseline model

epochs = 5
learning_rate = 1e-3
loss_func = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.classifier.parameters(), lr=learning_rate)

train_loss_history_baseline, train_acc_history_baseline, val_loss_history_baseline, val_acc_history_baseline = fit_classifier(
    baseline_model, 
    train_loader=train_loader, 
    val_loader=val_loader,
    attributes=attribute_classes,
    optimizer=optimizer, 
    loss_func=loss_func, 
    epochs=epochs, 
    device=device
)

In [None]:
print(temp_output)