In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'fashion-product-images-dataset:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F139630%2F329006%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240628%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240628T203447Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D0f17450769d54defbd574cbfa6c35ea5c69b69545e041fb19dae864d189c557070ff1047f4bc217a486bfc5a405e219ff3c5d276d3c2e753a225a8a6ade1c569d4912a42e0190ea9512fdfc0e3ab5d35c196ca6f9be9b0ce7129830186d772c0d3a79be64ea4bcdbdb643b13f072cec24e90ab1c0c84e03fcd1d89f0f501ee56f739b58468b87b13ddb5cde47d944c0d91a3c1ec6c5aa81bbad1d2a4d520a8b63736be4ece1a0f8dc70150703b9f3b35db079118f4a25e44146e09317ff51e553c628448983b45f5a251fd57d2ec5869135894ab6ee074214cf88852b85388132da8e9e3ce76d023a893e60eaa580410875da83e7b8f18b3d0f9c1119650a977,model-fashion/tensorflow2/model-fashion/1:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-models-data%2F58768%2F70394%2Fbundle%2Farchive.tar.gz%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240628%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240628T203447Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D7f77158db7fdb3c277fe4ad1d00b5304e2d43917ad998760d5315ca82d47e6e644d9eb9e1af7b572370d415603522eebfae0c7a6f00cc3785641cfdfda3273affc77f6579248af5e3ea00858f972171cadb7aa581b5dfa95fac64fa132279233249fe5256a4693fc1ccc78322150cecfbd3baf8c1620ed4444aae55450c4421e30fbf78ec8469df524233af705a34d4dca8bcc56ea1dbe418eb215b5ad138d6aeaa8c058c5533d533a11eb3127b97364ba613f6b8835f7d747598b31a29a4d9552d3025e95974985b3aaf97109b92a770dccbf9e21020176eaeddd6554c07317aef7c235882c3d14711d288f7a1b1251e766252b804ddd7613f589f14a2dba5f'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


In [None]:
import os
import cv2
import random
import numpy as np
import pandas as pd

import matplotlib
import matplotlib.pyplot as plt
matplotlib.use("Agg")

import tensorflow as tf
from tqdm import tqdm
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (Input, Flatten)
from tensorflow.keras.layers import (Dense, Lambda, Dropout, Activation)

from tensorflow.keras.layers import (Conv2D, MaxPooling2D)
from tensorflow.keras.layers import BatchNormalization

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import img_to_array

from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split

In [None]:
STYLES_CSV_PATH = "/kaggle/input/fashion-product-images-dataset/fashion-dataset/styles.csv"
IMAGES_PATH = "/kaggle/input/fashion-product-images-dataset/fashion-dataset/images/"

In [None]:
df = pd.read_csv(STYLES_CSV_PATH, error_bad_lines = False, warn_bad_lines=False)
print(f"Total Rows: {df.shape[0]}\nTotal Columns: {df.shape[1]}")
df.head()

In [None]:
print('NaN Count:')
df.isna().sum()

In [None]:
df = df.dropna()
df = df.drop(['year', 'productDisplayName', 'masterCategory', 'subCategory','usage'], axis=1)

df = df[df['id'].isin([int(i.split('.')[0]) for i in os.listdir(IMAGES_PATH)])]

I have grouped similar color and article types. This reduces the number of classes and can be effective for imbalance

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import cv2
import matplotlib.image as mpimg

import os
import PIL
import PIL.Image
import pathlib


df = df.drop(df[df['gender'] == 'Boys'].index)
df = df.drop(df[df['gender'] == 'Girls'].index)
article_mapping = {
    'Clothing': ['Tshirts', 'Shirts', 'Kurtas', 'Tops', 'Briefs', 'Jeans', 'Trousers', 'Bra', 'Shorts', 'Sarees', 'Dresses', 'Track Pants', 'Sweatshirts', 'Sweaters', 'Jackets', 'Kurtis', 'Innerwear Vests', 'Tunics', 'Nightdress', 'Leggings', 'Night suits', 'Trunk', 'Capris', 'Skirts', 'Kurta Sets', 'Lounge Pants', 'Boxers'],
    'Footwear': ['Casual Shoes', 'Sports Shoes', 'Heels', 'Flip Flops', 'Sandals', 'Formal Shoes', 'Flats', 'Sports Sandals'],
    'Accessories': ['Watches', 'Handbags', 'Sunglasses', 'Wallets', 'Belts', 'Backpacks', 'Socks', 'Earrings', 'Clutches', 'Caps', 'Pendant', 'Necklace and Chains', 'Scarves', 'Ring', 'Dupatta', 'Cufflinks', 'Accessory Gift Set', 'Stoles', 'Duffel Bag', 'Bangle', 'Laptop Bag', 'Bracelet', 'Jewellery Set'],
    'Personal Care': ['Perfume and Body Mist', 'Deodorant', 'Nail Polish', 'Lipstick', 'Lip Gloss', 'Kajal and Eyeliner', 'Foundation and Primer', 'Fragrance Gift Set'],
    'Other': ['Other', 'Free Gifts']
}

color_mapping = {
    'Black': ['Black', 'Charcoal'],
     'Grey': [ 'Grey', 'Steel', 'Grey Melange'],
    'White': ['White', 'Off White', 'Cream', 'Beige', 'Tan'],
    'Blue': ['Blue', 'Navy Blue', 'Turquoise Blue', 'Teal'],
    'Red': ['Red', 'Maroon', 'Rust'],
    'Pink': ['Pink', 'Purple', 'Lavender', 'Magenta'],
    'Purple': ['Purple', 'Lavender'],
    'Green': ['Green', 'Olive'],
    'Yellow': ['Yellow', 'Mustard', 'Gold'],
    'Orange':['Orange'],
    'Brown': ['Brown', 'Bronze', 'Copper'],
    'Other Colors': ['Silver', 'Multi', 'Other', 'Peach', 'Skin', 'Khaki']
}

# Apply the mapping to the articleType column

color_reverse_mapping = {item: key for key, values in color_mapping.items() for item in values}
article_reverse_mapping = {item: key for key, values in article_mapping.items() for item in values}

# Apply the mapping to the baseColour column
df['baseColour'] = df['baseColour'].map(color_reverse_mapping).fillna(df['baseColour'])
df['articleType'] = df['articleType'].map(article_reverse_mapping).fillna(df['articleType'])

# catcounts=pd.value_counts(df['gender'])
# print(catcounts)
catcounts=pd.value_counts(df['articleType'])
print(catcounts)
# for i in catcounts:
#     print(i)

catcounts=pd.value_counts(df['baseColour'])
print(catcounts)
# catcounts=pd.value_counts(df['season'])
# print(catcounts)


In [None]:
df['id'] = df['id'].apply(lambda x: IMAGES_PATH+str(x) +'.jpg')

image_ids = df.pop('id')


In [None]:
print(image_ids)

In [None]:
from keras.applications.resnet import preprocess_input


IMAGE_DIMS = (60, 60, 3)

def load_image(imagePath):
    image = cv2.imread(imagePath)
    image = cv2.resize(image, (IMAGE_DIMS[1], IMAGE_DIMS[0]))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = preprocess_input(image)
    return image

image_data = []
for img_path in tqdm(image_ids):
    image_data.append(load_image(img_path))
image_data = np.array(image_data, dtype="float")

In [None]:
articleTypeLB = LabelBinarizer()
genderLB = LabelBinarizer()
baseColourLB = LabelBinarizer()
seasonLB = LabelBinarizer()


articleTypeLabels = articleTypeLB.fit_transform(np.array(df['articleType'].values))
genderLabels = genderLB.fit_transform(np.array(df['gender'].values))
baseColourLabels = baseColourLB.fit_transform(np.array(df['baseColour'].values))
seasonLabels = seasonLB.fit_transform(np.array(df['season'].values))



split = train_test_split(image_data,
                         articleTypeLabels,
                         genderLabels,
                         baseColourLabels,
                         seasonLabels,
                         test_size=0.3, random_state=42)

(trainX, testX,
 trainArticleTypeY, testArticleTypeY,
 trainGenderY, testGenderY,
 trainBaseColourY, testBaseColourY,
 trainSeasonY, testSeasonY) = split

I have used class weight method as a solution for the class imbalance. The class weight method involves assigning different weights to different classes to counteract the imbalance. During model training, these weights adjust the loss function, so that misclassifying a minority class instance will have a higher penalty than misclassifying a majority class instance. This encourages the model to pay more attention to the minority classes, improving their classification performance.

In [None]:

from sklearn.utils.class_weight import compute_class_weight
color_class_weights = compute_class_weight('balanced', classes=np.unique(df['baseColour']), y=df['baseColour'])
type_class_weights = compute_class_weight('balanced', classes=np.unique(df['articleType']), y=df['articleType'])
season_class_weights = compute_class_weight('balanced', classes=np.unique(df['season']), y=df['season'])
gender_class_weights = compute_class_weight('balanced', classes=np.unique(df['gender']), y=df['gender'])

color_class_weights = {i: weight for i, weight in enumerate(color_class_weights)}
type_class_weights = {i: weight for i, weight in enumerate(type_class_weights)}
season_class_weights = {i: weight for i, weight in enumerate(season_class_weights)}
gender_class_weights = {i: weight for i, weight in enumerate(gender_class_weights)}


In [None]:
from tensorflow.keras.applications import ResNet50

In [None]:
def make_branch(res_input, n_out, act_type, name):
    z = Dense(512, activation="relu")(res_input)
    z = Dense(256, activation='relu')(z)
    z = Dense(128, activation='relu')(z)
    z = Dense(n_out)(z)
    z = Activation(act_type, name=name+'_output')(z)
    return z

In [None]:
def build_model(width, height):

    # -------------------------
    res50 = ResNet50(weights='imagenet', include_top=False, input_shape=IMAGE_DIMS)
    res50.trainable=False
    inputs = Input(shape=IMAGE_DIMS)
    x = res50(inputs, training=False)
    x = Flatten()(x)
    x = Dense(1024, activation='relu')(x)
    # -------------------------

    article_branch = make_branch(x, len(articleTypeLB.classes_), 'softmax', 'article')
    gender_branch = make_branch(x, len(genderLB.classes_), 'softmax', 'gender')
    color_branch = make_branch(x, len(baseColourLB.classes_), 'softmax', 'color')
    season_branch = make_branch(x, len(seasonLB.classes_), 'softmax', 'season')

    model = Model(inputs=inputs,
                outputs=[article_branch, gender_branch, color_branch,
                            season_branch])
    return model

In [None]:
model = build_model(60, 60)

losses = {
    "article_output": "categorical_crossentropy",
    "gender_output": "categorical_crossentropy",
    "color_output": "categorical_crossentropy",
    "season_output": "categorical_crossentropy"
}

EPOCHS = 25
INIT_LR = 1e-5
BS = 32

opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(optimizer=opt, loss=losses, metrics=["accuracy"])

In [None]:
from tensorflow.keras.utils import plot_model
plot_model(model)
class_weight = {
    'color_output': color_class_weights,
    'article_output': type_class_weights,
    'season_output': season_class_weights,
    'gender_output': gender_class_weights
}

In [None]:
H = model.fit(trainX,
    {"article_output": trainArticleTypeY,
    "gender_output": trainGenderY,
    "color_output": trainBaseColourY,
    "season_output": trainSeasonY},
    validation_data=(testX,
    {"article_output": testArticleTypeY,
    "gender_output": testGenderY,
    "color_output": testBaseColourY,
    "season_output": testSeasonY}),
    epochs=100,
    batch_size=BS,
    class_weight=class_weight,
    verbose=1)

In [None]:
res = model.evaluate(testX,
    {"article_output": testArticleTypeY,
    "gender_output": testGenderY,
    "color_output": testBaseColourY,
    "season_output": testSeasonY}, batch_size=32, verbose=0)
print('loss', res[:6])
print('acc', list(map(lambda x: round(x*100,2), res[6:])))

In [None]:
#inference on test-data-set
idx=130

(categoryProba, genderProba, colorProba, seasonProba) = model.predict(np.expand_dims(testX[idx], axis=0))

categoryIdx = categoryProba[0].argmax()
genderIdx = genderProba[0].argmax()
colorIdx = colorProba[0].argmax()
seasonIdx = seasonProba[0].argmax()

categoryLabel = articleTypeLB.classes_[categoryIdx]
genderLabel = genderLB.classes_[genderIdx]
colorLabel = baseColourLB.classes_[colorIdx]
seasonLabel = seasonLB.classes_[seasonIdx]


categoryText = "Category: {} ({:.2f}%)".format(categoryLabel, categoryProba[0][categoryIdx] * 100)
genderText = "Gender: {} ({:.2f}%)".format(genderLabel, genderProba[0][genderIdx] * 100)
colorText = "Color: {} ({:.2f}%)".format(colorLabel, colorProba[0][colorIdx] * 100)
seasonText = "Season: {} ({:.2f}%)".format(seasonLabel, seasonProba[0][seasonIdx] * 100)


print(categoryText, '-----', articleTypeLB.classes_[testArticleTypeY[idx].argmax()])
print(genderText, '-----',genderLB.classes_[testGenderY[idx].argmax()])
print(colorText, '-----',baseColourLB.classes_[testBaseColourY[idx].argmax()])
print(seasonText, '-----',seasonLB.classes_[testSeasonY[idx].argmax()])


In [None]:
model.save('/kaggle/working/multi_output_model.h5')

In [None]:
#inference on external data
from keras.applications.resnet import preprocess_input
IMAGE_DIMS = (60, 60, 3)

def load_image_inference(imagePath):
    image = cv2.imread(imagePath)
    image = cv2.resize(image, (IMAGE_DIMS[1], IMAGE_DIMS[0]))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = preprocess_input(image)
    return image

#model loading
model = tf.keras.models.load_model('/kaggle/input/model-fashion/tensorflow2/model-fashion/1/multi_output_model.h5')
#image loading
(categoryProba, genderProba, ageProba, colorProba) = model.predict(np.expand_dims(load_image_inference("/kaggle/input/test-image/image_1.jpeg"), axis=0))

categoryIdx = categoryProba[0].argmax()
genderIdx = genderProba[0].argmax()
ageIdx = ageProba[0].argmax()
colorIdx = colorProba[0].argmax()

categoryLabel = articleTypeLB.classes_[categoryIdx]
genderLabel = genderLB.classes_[genderIdx]
ageLabel = baseColourLB.classes_[ageIdx]
colorLabel = seasonLB.classes_[colorIdx]


categoryText = "Category: {} ({:.2f}%)".format(categoryLabel, categoryProba[0][categoryIdx] * 100)
genderText = "Gender: {} ({:.2f}%)".format(genderLabel, genderProba[0][genderIdx] * 100)
ageText = "Age: {} ({:.2f}%)".format(ageLabel, ageProba[0][ageIdx] * 100)
colorText = "Color: {} ({:.2f}%)".format(colorLabel, colorProba[0][colorIdx] * 100)


print(categoryText, '-----', articleTypeLB.classes_[testArticleTypeY[idx].argmax()])
print(genderText, '-----',genderLB.classes_[testGenderY[idx].argmax()])
print(ageText, '-----',baseColourLB.classes_[testBaseColourY[idx].argmax()])
print(colorText, '-----',seasonLB.classes_[testSeasonY[idx].argmax()])