In [None]:
import os
import cv2
import random
import numpy as np
import pandas as pd

import matplotlib
import matplotlib.pyplot as plt
matplotlib.use("Agg")

import tensorflow as tf
from tqdm import tqdm
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (Input, Flatten)
from tensorflow.keras.layers import (Dense, Lambda, Dropout, Activation)

from tensorflow.keras.layers import (Conv2D, MaxPooling2D)
from tensorflow.keras.layers import BatchNormalization

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import img_to_array

from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split

In [None]:
STYLES_CSV_PATH = "/kaggle/input/fashion-product-images-dataset/fashion-dataset/styles.csv"
IMAGES_PATH = "/kaggle/input/fashion-product-images-dataset/fashion-dataset/images/"

In [None]:
df = pd.read_csv(STYLES_CSV_PATH, error_bad_lines = False, warn_bad_lines=False)
print(f"Total Rows: {df.shape[0]}\nTotal Columns: {df.shape[1]}")
df.head()

In [None]:
print('NaN Count:')
df.isna().sum()

In [None]:
df = df.dropna()
df = df.drop(['year', 'productDisplayName', 'masterCategory', 'subCategory'], axis=1)
df = df.sample(10000)
df = df[df['id'].isin([int(i.split('.')[0]) for i in os.listdir(IMAGES_PATH)])]

# Pre-processing

Adding `.jpg` to id column

In [None]:
df['id'] = df['id'].apply(lambda x: IMAGES_PATH+str(x) +'.jpg')
image_ids = df.pop('id')

Binning values with < 50 samples

In [None]:
def bin_values(x):
    x_vc = df[x].value_counts()
    x_other = x_vc[x_vc<50].index
    df.loc[df[x].isin(x_other),x] = 'Other'
    
for col in df.columns:
    bin_values(col)

Load Images

In [None]:
from keras.applications.resnet import preprocess_input

IMAGE_DIMS = (60, 60, 3)

def load_image(imagePath):
    image = cv2.imread(imagePath)
    image = cv2.resize(image, (IMAGE_DIMS[1], IMAGE_DIMS[0]))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = preprocess_input(image)
    return image

image_data = []
for img_path in tqdm(image_ids):
    image_data.append(load_image(img_path))
    
image_data = np.array(image_data, dtype="float")

Load other features

In [None]:
articleTypeLB = LabelBinarizer()
genderLB = LabelBinarizer()
baseColourLB = LabelBinarizer()
seasonLB = LabelBinarizer()
usageLB = LabelBinarizer()

articleTypeLabels = articleTypeLB.fit_transform(np.array(df['articleType'].values))
genderLabels = genderLB.fit_transform(np.array(df['gender'].values))
baseColourLabels = baseColourLB.fit_transform(np.array(df['baseColour'].values))
seasonLabels = seasonLB.fit_transform(np.array(df['season'].values))
usageLabels = usageLB.fit_transform(np.array(df['usage'].values))

split = train_test_split(image_data,
                         articleTypeLabels, 
                         genderLabels, 
                         baseColourLabels, 
                         seasonLabels, 
                         usageLabels, 
                         test_size=0.2, random_state=42)

(trainX, testX,
 trainArticleTypeY, testArticleTypeY,
 trainGenderY, testGenderY,
 trainBaseColourY, testBaseColourY,
 trainSeasonY, testSeasonY,
 trainUsageY, testUsageY) = split

In [None]:
from tensorflow.keras.applications import ResNet50

In [None]:
def make_branch(res_input, n_out, act_type, name):
    z = Dense(512, activation="relu")(res_input)
    z = Dense(256, activation='relu')(z)
    z = Dense(128, activation='relu')(z)
#     z = BatchNormalization()(z)
#     z = Dropout(0.5)(z)
    z = Dense(n_out)(z)
    z = Activation(act_type, name=name+'_output')(z)
    return z

In [None]:
def build_model(width, height):

    # -------------------------
    res50 = ResNet50(weights='imagenet', include_top=False, input_shape=IMAGE_DIMS)
    res50.trainable=False
    inputs = Input(shape=IMAGE_DIMS)
    x = res50(inputs, training=False)
    x = Flatten()(x)
    x = Dense(1024, activation='relu')(x)
    # -------------------------

    article_branch = make_branch(x, len(articleTypeLB.classes_), 'softmax', 'article')
    gender_branch = make_branch(x, len(genderLB.classes_), 'softmax', 'gender')
    color_branch = make_branch(x, len(baseColourLB.classes_), 'softmax', 'color')
    season_branch = make_branch(x, len(seasonLB.classes_), 'softmax', 'season')
    usage_branch = make_branch(x, len(usageLB.classes_), 'softmax', 'usage')

    model = Model(inputs=inputs,
                outputs=[article_branch, gender_branch, color_branch, 
                            season_branch, usage_branch])
    return model

In [None]:
model = build_model(60, 60)

losses = {
    "article_output": "categorical_crossentropy",
    "gender_output": "categorical_crossentropy",
    "color_output": "categorical_crossentropy",
    "season_output": "categorical_crossentropy",
    "usage_output": "categorical_crossentropy",
}

EPOCHS = 25
INIT_LR = 1e-5
BS = 32

opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(optimizer=opt, loss=losses, metrics=["accuracy"])

In [None]:
from tensorflow.keras.utils import plot_model
plot_model(model)

In [None]:
H = model.fit(trainX,
    {"article_output": trainArticleTypeY,
    "gender_output": trainGenderY,
    "color_output": trainBaseColourY,
    "season_output": trainSeasonY,
    "usage_output": trainUsageY},
    validation_data=(testX, 
    {"article_output": testArticleTypeY,
    "gender_output": testGenderY,
    "color_output": testBaseColourY,
    "season_output": testSeasonY,
    "usage_output": testUsageY}),
    epochs=EPOCHS,
    batch_size=BS,
    verbose=1)

In [None]:
# %matplotlib inline
h_df = pd.DataFrame(H.history)
h_df.loc[:,[col for col in h_df.columns if 'accuracy' in col and 'val' not in col]].plot()
h_df.loc[:,[col for col in h_df.columns if 'accuracy' in col and 'val' in col]].plot()
h_df.loc[:,[col for col in h_df.columns if 'loss' in col and 'val' not in col]].plot()
h_df.loc[:,[col for col in h_df.columns if 'loss' in col and 'val' in col]].plot()

In [None]:
res = model.evaluate(testX, 
    {"article_output": testArticleTypeY,
    "gender_output": testGenderY,
    "color_output": testBaseColourY,
    "season_output": testSeasonY,
    "usage_output": testUsageY}, batch_size=32, verbose=0)
print('loss', res[:6])
print('acc', list(map(lambda x: round(x*100,2), res[6:])))

In [None]:
idx=3

(categoryProba, genderProba, ageProba, colorProba, styleProba) = model.predict(np.expand_dims(testX[idx], axis=0))

categoryIdx = categoryProba[0].argmax()
genderIdx = genderProba[0].argmax()
ageIdx = ageProba[0].argmax()
colorIdx = colorProba[0].argmax()
styleIdx = styleProba[0].argmax()
categoryLabel = articleTypeLB.classes_[categoryIdx]
genderLabel = genderLB.classes_[genderIdx]
ageLabel = baseColourLB.classes_[ageIdx]
colorLabel = seasonLB.classes_[colorIdx]
styleLabel = usageLB.classes_[styleIdx]

categoryText = "Category: {} ({:.2f}%)".format(categoryLabel, categoryProba[0][categoryIdx] * 100)
genderText = "Gender: {} ({:.2f}%)".format(genderLabel, genderProba[0][genderIdx] * 100)
ageText = "Age: {} ({:.2f}%)".format(ageLabel, ageProba[0][ageIdx] * 100)
colorText = "Color: {} ({:.2f}%)".format(colorLabel, colorProba[0][colorIdx] * 100)
styleText = "Style: {} ({:.2f}%)".format(styleLabel, styleProba[0][styleIdx] * 100)

print(categoryText, '-----', articleTypeLB.classes_[testArticleTypeY[idx].argmax()])
print(genderText, '-----',genderLB.classes_[testGenderY[idx].argmax()])
print(ageText, '-----',baseColourLB.classes_[testBaseColourY[idx].argmax()])
print(colorText, '-----',seasonLB.classes_[testSeasonY[idx].argmax()])
print(styleText, '-----',usageLB.classes_[testUsageY[idx].argmax()])