**Large Shoe Dataset (UT Zappos50k)**

* Data Importing
* Defining Labels (Classes)
* Train, Validation and Test
* Augmenting the Image Dataset
* Building a Model
* Testing Model

In [None]:
import numpy as np 
import pandas as pd
import os
from pathlib import Path
from sklearn.model_selection import train_test_split 
from keras.preprocessing.image import ImageDataGenerator
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
import tensorflow as tf
import matplotlib.pyplot as plt

**Data Importing**

In [None]:
direc = Path('../input/large-shoe-dataset-ut-zappos50k/ut-zap50k-images/ut-zap50k-images')
filepaths = list(direc.glob(r'**/**/**/*.jpg'))
Labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1],filepaths))


filepaths = pd.Series(filepaths, name='FilePaths').astype(str)
Labels = pd.Series(Labels, name='Labels').astype(str)

img_df = pd.merge(filepaths, Labels, right_index = True, left_index = True)


#Resampling it
img_df.head()

**Defining Labels (Classes)**

In [None]:
fp = filepaths.str.split(pat="/", n = 8, expand = True)

Label1 = fp[5]+" - "+fp[6]
Label1 = pd.DataFrame(Label1)
Label1 = Label1.rename(columns = {0:'Label1'})

img_df2 = pd.concat([img_df, Label1], axis=1)
img_df2 = img_df2.drop('Labels', axis=1)
img_df2

In [None]:
import matplotlib.pyplot as plt
f,a = plt.subplots(nrows=5, ncols=8,figsize=(13, 7),
                        subplot_kw={'xticks': [], 'yticks': []})

for i, ax in enumerate(a.flat):
    ax.imshow(plt.imread(img_df2.FilePaths[i]))
    ax.set_title(img_df2.Label1[i])
    
plt.tight_layout()
plt.show()

In [None]:
print(f" Count of Rows : {img_df2.shape[0]} \n Count of Columns : {img_df2.shape[1]} ")

In [None]:
img_df2['Label1'].value_counts(ascending=True)

**Classes (Labels) are trained with many images. Usually around 100 images are sufficient to train a class.
Therefore I set the minimum amount of class as 100.**

In [None]:
img_df2 = img_df2.groupby('Label1').filter(lambda x : len(x)>=100)

In [None]:
img_df2['Label1'].value_counts(ascending=True)

**Below we can see the pie chart of Classes that are about to be trained and tested for accuracy**

In [None]:
plt.figure(figsize=(10,8))
plt.pie(img_df2['Label1'].value_counts(ascending=True), labels=img_df2['Label1'].unique(),autopct='%1.2f',textprops=dict(color="w"))
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.title("Shoe Types")
plt.show()


**Defining Train, Validation and Test**

In [None]:
train_ratio = 0.70
validation_ratio = 0.15
test_ratio = 0.15


x_train, x_test = train_test_split(img_df2, test_size=1 - train_ratio, stratify=img_df2['Label1'])
x_val, x_test = train_test_split(x_test, test_size=test_ratio/(test_ratio + validation_ratio),stratify=x_test['Label1']) 

print(f'Shape of Training Data : ',x_train.shape)
print(f'Shape of Testing Data : ',x_test.shape)
print(f'Shape of Validation Data : ',x_val.shape)
x_test = x_test.copy()

**Augmenting the Image Dataset**

In [None]:
img_datagen = ImageDataGenerator(preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input)

img_size=(224, 224)    

x_train = img_datagen.flow_from_dataframe(dataframe = x_train, x_col='FilePaths', y_col='Label1', target_size=img_size, color_mode='rgb',class_mode='categorical',batch_size=32,seed=42)
x_test = img_datagen.flow_from_dataframe(dataframe = x_test, x_col='FilePaths', y_col='Label1', target_size=img_size,color_mode='rgb',class_mode='categorical',batch_size=32,seed=42)
x_val = img_datagen.flow_from_dataframe(dataframe = x_val, x_col='FilePaths', y_col='Label1', target_size=img_size, color_mode='rgb',class_mode='categorical',batch_size=32,seed=42)

**Building a Model**

In [None]:
model = keras.Sequential([

    # First Convolutional Block
    tf.keras.layers.Conv2D(filters=32, kernel_size=5, activation="relu", padding='same',
                  # give the input dimensions in the first layer
                  # [height, width, color channels(RGB)]
                 input_shape=[224, 224, 3]),
    tf.keras.layers.MaxPool2D(),

    # Second Convolutional Block
    tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation="relu", padding='same'),
    tf.keras.layers.MaxPool2D(),

    # Third Convolutional Block
    tf.keras.layers.Conv2D(filters=128, kernel_size=3, activation="relu", padding='same'),
    tf.keras.layers.MaxPool2D(),

    # Classifier Head
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(740, activation="relu"),
    tf.keras.layers.Dense(15, activation="softmax"),
])
model.summary()

model.compile(optimizer="adam",
             loss="binary_crossentropy",
             metrics=["accuracy"])

In [None]:
Callback = tf.keras.callbacks.EarlyStopping(monitor='accuracy', patience=3)
model_fit = model.fit(x_train,
                      validation_data = x_val, 
                      epochs = 10, callbacks=Callback)

In [None]:
print('Model summary :')
print()
model.summary()

**Testing Model**

In [None]:
test_accuracy = model.evaluate(x_test)[1] * 100
print('Test accuracy is : ',test_accuracy, '%' )