In [None]:
import numpy as np
import pandas as pd
import os
import cv2

PATH = "/kaggle/input/fashion-product-images-dataset/fashion-dataset"
CSV_PATH = os.path.join(PATH,'styles.csv')
IMG_PATH = os.path.join(PATH,'images')

PP_IMG_PATH = "/kaggle/working/preprocessed/"
if not os.path.exists(PP_IMG_PATH):
    os.makedirs(PP_IMG_PATH)

from tqdm import tqdm
import tensorflow as tf
import multiprocessing as mp
from tensorflow.keras import Model
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator
print(tf.test.is_gpu_available())

In [None]:
tf.test.gpu_device_name()

In [None]:
df = pd.read_csv(CSV_PATH, error_bad_lines=False, warn_bad_lines=False)
print('DF shape:', df.shape)

df = df[df['baseColour'].notna()]
df['id'] = df['id'].apply(lambda x: str(x)+'.jpg')
df = df[df['id'].isin(os.listdir(IMG_PATH))]

train_df = df.sample(frac=0.8,random_state=43)
test_df = df.drop(train_df.index)
print('Train-DF shape:', train_df.shape)
print('Test-DF shape:', test_df.shape)

In [None]:
def preprocess_image(sub_df):
    sub_df = sub_df.reset_index()
    for i in tqdm(range(sub_df.shape[0])):
        item = sub_df.iloc[i]
        path = os.path.join(IMG_PATH,item.id)
        img = cv2.imread(path)
        img = cv2.resize(img, (224, 224))
        cv2.imwrite(PP_IMG_PATH + item.id,img)
        del img
    del sub_df

In [None]:
def multiprocess(subset):
    n_cpu = mp.cpu_count()
    pool = mp.Pool(n_cpu)
    n_cnt = subset.shape[0] // n_cpu
    dfs = [subset.iloc[n_cnt*i:n_cnt*(i+1)] for i in range(n_cpu)]
    dfs[-1] = subset.iloc[n_cnt*(n_cpu-1):] 
    res = pool.map(preprocess_image, [x_df for x_df in dfs])
    pool.close()

In [None]:
multiprocess(train_df)

In [None]:
multiprocess(test_df)

In [None]:
image_data_generator = ImageDataGenerator(rotation_range=0.2,
                                          width_shift_range=0.2,
                                          height_shift_range=0.2,
                                          shear_range=0.1,
                                          zoom_range=0.3,
                                          horizontal_flip=True,
                                          preprocessing_function=preprocess_input,
                                          validation_split=0.2)

image_data_generator_config = {
    'x_col':'id',
    'y_col':'baseColour',
    'directory':PP_IMG_PATH, 
    'target_size':(224,224), 
    "batch_size":128, 
    "class_mode":'categorical'}

# train_generator = image_data_generator.flow_from_dataframe(subset = 'training',
#                                                            dataframe = train_df,
#                                                            **image_data_generator_config)

# val_generator = image_data_generator.flow_from_dataframe(subset = 'validation', 
#                                                           dataframe = train_df,
#                                                           **image_data_generator_config)

test_generator = image_data_generator.flow_from_dataframe(dataframe = test_df,
                                                          **image_data_generator_config)

In [None]:
num_classes = len(set(test_generator.classes))

vgg_model = VGG16(include_top=False, weights='imagenet', input_shape=test_generator.image_shape)

for layer in vgg_model.layers:
    layer.trainable = False

x = Flatten()(vgg_model.output)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(num_classes, activation='softmax')(x)

model = Model(vgg_model.input, x)

# model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metrics = ['accuracy'])
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics = ['accuracy'])

In [None]:
es = EarlyStopping(monitor='val_loss', min_delta=0.001, patience=7, verbose=0, mode='min', restore_best_weights=True)

In [None]:
train_ss = test_generator.n//test_generator.batch_size
# val_ss = val_generator.n//val_generator.batch_size

H = model.fit(test_generator,
                     steps_per_epoch = train_ss,
                     epochs = 5,
                     callbacks = [es])
#                      validation_data = val_data,
#                      validation_steps = val_ss)