# Data pre-processing

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os # accessing directory structure
from sklearn.model_selection import train_test_split

In [None]:
DATASET_PATH = "/kaggle/input/fashion-product-images-dataset/fashion-dataset/fashion-dataset/"
print(os.listdir(DATASET_PATH))

In [None]:
image_meta = pd.read_csv(DATASET_PATH+'styles.csv',error_bad_lines=False)

In [None]:
image_meta.shape

In [None]:
image_meta.head()

In [None]:
image_meta['image'] = image_meta.apply(lambda row: str(row['id']) + ".jpg", axis=1)

In [None]:
image_meta.masterCategory.value_counts()

In [None]:
train_image_meta = image_meta.groupby('masterCategory').filter(lambda x: len(x) > 2000);

In [None]:
#train_image_meta = train_image_meta.groupby('masterCategory').head(2400)

In [None]:
#train_image_meta.masterCategory.value_counts()

In [None]:
train_image_meta, test_image_meta = train_test_split(train_image_meta, test_size=0.1)

In [None]:
train_image_meta, val_image_meta = train_test_split(train_image_meta, test_size=0.2)

In [None]:
train_image_meta.head(5)

In [None]:
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array, array_to_img

In [None]:
delete_row = train_image_meta[train_image_meta["id"]==12347].index
train_image_meta = train_image_meta.drop(delete_row)
delete_row = train_image_meta[train_image_meta["id"]==39403].index
train_image_meta = train_image_meta.drop(delete_row)
delete_row = train_image_meta[train_image_meta["id"]==39410].index
train_image_meta = train_image_meta.drop(delete_row)
delete_row = train_image_meta[train_image_meta["id"]==39425].index
train_image_meta = train_image_meta.drop(delete_row)
delete_row = train_image_meta[train_image_meta["id"]==39401].index
train_image_meta = train_image_meta.drop(delete_row)

In [None]:
IMG_WIDTH=100
IMG_HEIGHT=75
IMG_DIM = (IMG_HEIGHT,IMG_WIDTH)
IMG_PATH = DATASET_PATH + 'images/'
val_img = [img_to_array(load_img(IMG_PATH+img.image, target_size=IMG_DIM)) for i, img in val_image_meta.iterrows()]
val_img = np.array(val_img)
val_labels = [img.masterCategory for i, img in val_image_meta.iterrows()]
train_imgs = [img_to_array(load_img(IMG_PATH+img.image, target_size=IMG_DIM)) for i, img in train_image_meta.iterrows()]
train_imgs = np.array(train_imgs)
train_labels = [img.masterCategory for i, img in train_image_meta.iterrows()]
test_imgs = [img_to_array(load_img(IMG_PATH+img.image, target_size=IMG_DIM)) for i, img in test_image_meta.iterrows()]
test_imgs = np.array(test_imgs)
test_labels = [img.masterCategory for i, img in test_image_meta.iterrows()]

In [None]:
test_imgs_class = [img.masterCategory for i, img in test_image_meta.iterrows()]

In [None]:
test_imgs_class

In [None]:
# encode text category labels 
from sklearn.preprocessing import LabelEncoder 
 
le = LabelEncoder() 
le.fit(train_labels) 
train_labels_enc = le.transform(train_labels)
le.fit(val_labels)
val_labels_enc = le.transform(val_labels)
le.fit(test_labels)
test_labels_enc = le.transform(test_labels)

In [None]:
train_labels_enc

In [None]:
name_labels = [(train_labels[i], train_labels_enc[i]) for i in range(0, len(train_labels_enc))]

In [None]:
from collections import Counter

input_shape = (IMG_HEIGHT,IMG_WIDTH)
NUM_CLASSES = len(Counter(train_labels_enc).keys())

In [None]:
NUM_CLASSES

In [None]:
train_labels_enc.shape

In [None]:
from keras.utils import to_categorical
train_labels_enc = to_categorical(train_labels_enc)
val_labels_enc = to_categorical(val_labels_enc)
test_labels_enc = to_categorical(test_labels_enc)

In [None]:
train_labels_enc.shape

In [None]:
val_labels_enc.shape

In [None]:
test_labels_enc.shape

In [None]:
train_datagen = ImageDataGenerator(rotation_range=30,
                                   zoom_range=0.15,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   shear_range=0.15,
                                   horizontal_flip=True,
                                   fill_mode="nearest")
val_datagen = ImageDataGenerator()
test_datagen = ImageDataGenerator()

In [None]:
train_generator = train_datagen.flow(train_imgs, 
                                     train_labels_enc,
                                     batch_size=25, 
                                     shuffle=True)

In [None]:
validation_generator = val_datagen.flow(val_img, 
                                     val_labels_enc,
                                     batch_size=25, 
                                     shuffle=False)

In [None]:
test_generator = test_datagen.flow(test_imgs, 
                                test_labels_enc,
                                batch_size=1, 
                                shuffle=False)

In [None]:
from keras.applications.xception import Xception
from keras.models import Model
import keras
xception = Xception(include_top=False, weights='imagenet', input_shape=(IMG_HEIGHT,IMG_WIDTH,3))
# output = restnet.layers[-1].output
# output = keras.layers.Flatten()(output)
# restnet = Model(restnet.input, output=output)
# xception layers number:132
for layer in xception.layers:
    layer.trainable = False
xception.summary()

In [None]:
from keras.layers import Conv2D, GlobalAveragePooling2D, Flatten, Dense, Dropout, InputLayer
# from keras.models import Sequential
from keras import optimizers
# model_finetuned = Sequential()
# model_finetuned.add(restnet)
# model_finetuned.add(Dense(512, activation='relu'))
# model_finetuned.add(Dropout(0.3))
# model_finetuned.add(Dense(NUM_CLASSES, activation='softmax'))
# model_finetuned.compile(loss='categorical_crossentropy',
#               optimizer=optimizers.RMSprop(lr=1e-5),
#               metrics=['accuracy'])
# model_finetuned.summary()

In [None]:
base_model = xception
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
# and a logistic layer -- let's say we have 7 classes
predictions = Dense(NUM_CLASSES, activation='softmax')(x) 
model_finetuned = Model(inputs=base_model.input, outputs=predictions)
model_finetuned.compile(loss='categorical_crossentropy',
              optimizer=optimizers.SGD(lr=0.00001),
              metrics=['accuracy'])
model_finetuned.summary()

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [None]:
history_1 = model_finetuned.fit_generator(train_generator, 
                                          steps_per_epoch=None, 
                                          epochs=50, 
                                          verbose=1, 
                                          use_multiprocessing=True,
                                          validation_data = validation_generator,
                                          workers=10)

In [None]:
history_1.history

In [None]:
import matplotlib.pyplot as plt

acc = history_1.history['accuracy']
val_acc = history_1.history['val_accuracy']

loss = history_1.history['loss']
val_loss = history_1.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='center right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,15.0])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

In [None]:
print(sum(val_loss)/len(val_loss))

In [None]:
nb_samples = len(test_imgs_class)

In [None]:
predict = model_finetuned.predict_generator(test_generator,steps=nb_samples, verbose=1)

In [None]:
len(predict)

In [None]:
predict

In [None]:
predicted_class_indices=np.argmax(predict,axis=1)

In [None]:
predicted_class_indices

In [None]:
len(predicted_class_indices)

In [None]:
test_imgs_class

In [None]:
test_labels_enc

In [None]:
test_class_indices=np.argmax(test_labels_enc,axis=1)

In [None]:
test_class_indices

In [None]:
correct = 0
for i in range(len(test_class_indices)):
    if predicted_class_indices[i] == test_class_indices[i]:
        correct = correct + 1
print(correct)

In [None]:
test_accuracy = (correct*100.00)/len(test_class_indices)
print(test_accuracy)