# PHASE_5_CAPSTONE_PROJECT

# MODELING

### Data source:

The data source consists of over 44 thousand fashion images from Kaggle.

Source link: https://www.kaggle.com/datasets/paramaggarwal/fashion-product-images-dataset

### Target:

Leverage a dataset comprising more than 24k images across 10 distinct fashion product types, including t-shirts, shirts, casual shoes, watches, sports shoes, kurtas, handbags, heels, sunglasses, and wallets, for image labeling in customer search.

In [1]:
import pandas as pd
import numpy as np
import csv
import json
import glob
import os
import shutil
import random
import pickle
import h5py
import hdf5storage
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import seaborn as sns
from sklearn.preprocessing import MultiLabelBinarizer
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import array_to_img, img_to_array, load_img
from IPython.display import display
from tensorflow.keras import datasets
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from keras import layers
from keras.models import Sequential
from keras.layers import Reshape

from tensorflow.keras.layers import Dense # creates densely connected layer object
from tensorflow.keras.layers import Flatten # takes 2D input and turns into 1D array
from tensorflow.keras.layers import Conv2D # convolution layer
from tensorflow.keras.layers import MaxPooling2D # max pooling layer
from tensorflow.keras.regularizers import l2
from tensorflow.keras.regularizers import L2
from tensorflow.keras.layers import Dropout
from keras.models import load_model


from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay, classification_report

# a new layer that rescales/normalizes the activations after each layer.
# has a weak regularization effect. also allows 

from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Activation
from tensorflow.keras import layers, regularizers
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import Dropout
from tensorflow.keras.applications import VGG16, VGG19
from tensorflow.keras import Input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Resizing, RandomFlip, RandomRotation
from tensorflow.keras.layers import GlobalAveragePooling2D
import tensorflow as tf
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input

%matplotlib inline


In [2]:
# !pip install tensorflow

In [3]:
# Directory path
train_data_dir_sample = '/Users/seanluong/Downloads/TransferX/Train_Test_Validation/Train'
val_data_dir_sample = '/Users/seanluong/Downloads/TransferX/Train_Test_Validation/Validation'
test_data_dir_sample = '/Users/seanluong/Downloads/TransferX/Train_Test_Validation/Test'




# Define the image size and number of classes
image_size_224 = (224, 224)
num_classes = 10 


# specialized mobilenetv2 preprocesser
mobilenetv2_processor = tf.keras.applications.mobilenet_v2.preprocess_input

# Create the train_datagen with vgg_processor
train_datagen_mobilenetv2 = ImageDataGenerator(rescale=1./255, 
                                   rotation_range=40, 
                                   width_shift_range=0.2, 
                                   height_shift_range=0.2, 
                                   shear_range=0.3, 
                                   zoom_range=0.1, 
                                   horizontal_flip=False,
                                   preprocessing_function=mobilenetv2_processor)

# Create the val_datagen with vgg_processor
val_datagen_mobilenetv2 = ImageDataGenerator(rescale=1./255,
                                 preprocessing_function=mobilenetv2_processor)

# Create the test_datagen with vgg_processor
test_datagen_mobilenetv2 = ImageDataGenerator(rescale=1./255,
                                  preprocessing_function=mobilenetv2_processor)

# ...

# Flow data from the directory using the data generators
train_generator_mobilenetv2 = train_datagen_mobilenetv2.flow_from_directory(directory=train_data_dir_sample,
                                                    target_size=image_size_224,
                                                    class_mode='categorical',
                                                    batch_size=64,
                                                    shuffle=True,
                                                    classes=[str(i) for i in range(num_classes)])

val_generator_mobilenetv2 = val_datagen_mobilenetv2.flow_from_directory(directory=val_data_dir_sample,
                                                target_size=image_size_224,
                                                class_mode='categorical',
                                                batch_size=64,
                                                shuffle=True,
                                                classes=[str(i) for i in range(num_classes)])

test_generator_mobilenetv2 = test_datagen_mobilenetv2.flow_from_directory(directory=test_data_dir_sample,
                                                  target_size=image_size_224,
                                                  class_mode='categorical',
                                                  batch_size=64,
                                                  shuffle=False,
                                                  classes=[str(i) for i in range(num_classes)])


Found 14782 images belonging to 10 classes.
Found 4935 images belonging to 10 classes.
Found 4926 images belonging to 10 classes.


### MobileNetV2 Model

In [4]:
base_mobilenetv2_model = tf.keras.applications.MobileNetV2(weights='imagenet', include_top=False)

for layer in base_mobilenetv2_model.layers:
    layer.trainable = False




In [5]:
len(base_mobilenetv2_model.layers)

154

In [6]:
base_mobilenetv2_model.summary()

Model: "mobilenetv2_1.00_224"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, None, None, 3)]      0         []                            
                                                                                                  
 Conv1 (Conv2D)              (None, None, None, 32)       864       ['input_1[0][0]']             
                                                                                                  
 bn_Conv1 (BatchNormalizati  (None, None, None, 32)       128       ['Conv1[0][0]']               
 on)                                                                                              
                                                                                                  
 Conv1_relu (ReLU)           (None, None, None, 32)       0         ['bn_Conv1[

In [7]:
def custom_classifier(base_model, num_classes):
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(128, activation='relu')(x)
    output = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=output)
    return model

In [8]:
mobilenetv2_model = custom_classifier(base_mobilenetv2_model, num_classes)

In [9]:
mobilenetv2_model.compile(loss='categorical_crossentropy',
                             optimizer='Adam',
                             metrics=['accuracy', 'AUC', 'Precision', 'Recall'])

In [33]:
history_mobilenetv2_model = mobilenetv2_model.fit(train_generator_mobilenetv2,
                                                  steps_per_epoch=len(train_generator_mobilenetv2),
                                                  epochs=50,
                                                  validation_data=val_generator_mobilenetv2,
                                                  validation_steps=len(val_generator_mobilenetv2))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


#### Save model & history

In [92]:
mobilenetv2_model.save('mobilenetv2_base_model_50epoch.h5')

In [93]:
hdf5storage.savemat("history_mobilenetv2_base_model_epoch.h5", history_mobilenetv2_model.history)

#### Evaluate the model

In [94]:
base_mobilenetv2_model_loaded = load_model('mobilenetv2_base_model_50epoch.h5')

In [None]:
base_mobilenetv2_model_loaded.evaluate(train_generator_mobilenetv2)



In [None]:
base_mobilenetv2_model_loaded.evaluate(test_generator_mobilenetv2)

In [None]:
history_base_mobilenetv2_model_loaded = hdf5storage.loadmat("history_mobilenetv2_base_model_epoch.h5")

In [None]:
print(history_base_mobilenetv2_model_loaded.keys())

In [None]:
fig,ax=plt.subplots(figsize=(8,6))

base_mobilenev2_loss_values = history_base_mobilenetv2_model_loaded['loss']
base_mobilenev2__val_loss_values = history_base_mobilenetv2_model_loaded['val_loss']


epochs = range(1,len(base_mobilenev2_loss_values)+1)

ax.plot(epochs,base_mobilenev2_loss_values,label='Train loss vs epochs')
ax.plot(epochs,base_mobilenev2__val_loss_values,label='Validation loss vs epochs')

ax.set_title('Train & validation loss')
ax.legend()
ax.set_xlabel('Epochs')
plt.grid()
ax.set_ylabel('Loss');

In [43]:
mobilenetv2_model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, None, None, 3)]      0         []                            
                                                                                                  
 Conv1 (Conv2D)              (None, None, None, 32)       864       ['input_2[0][0]']             
                                                                                                  
 bn_Conv1 (BatchNormalizati  (None, None, None, 32)       128       ['Conv1[0][0]']               
 on)                                                                                              
                                                                                                  
 Conv1_relu (ReLU)           (None, None, None, 32)       0         ['bn_Conv1[0][0]']        

In [44]:
len(mobilenetv2_model.layers)

157

### Model tuning

Unfreeze last 2 layers

In [11]:
mobilenetv2_model_tune1 = tf.keras.applications.MobileNetV2(weights='imagenet', include_top=False)

for layer in mobilenetv2_model_tune1.layers[-2:]:
    layer.trainable = True



In [12]:
def custom_classifier(base_model, num_classes):
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(128, activation='relu')(x)
    output = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=output)
    return model

In [13]:
mobilenetv2_model_tune1 = custom_classifier(mobilenetv2_model_tune1, num_classes)

In [14]:
mobilenetv2_model_tune1.compile(loss='categorical_crossentropy',
                             optimizer='Adam',
                             metrics=['accuracy', 'AUC', 'Precision', 'Recall'])

In [67]:
history_mobilenetv2_model_tune1 = mobilenetv2_model_tune1.fit(train_generator_mobilenetv2,
                                                  steps_per_epoch=len(train_generator_mobilenetv2),
                                                  epochs=10,
                                                  validation_data=val_generator_mobilenetv2,
                                                  validation_steps=len(val_generator_mobilenetv2))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


### Model tunning 2

In [15]:
mobilenetv2_model_tune2 = tf.keras.applications.MobileNetV2(weights='imagenet', include_top=False)

for layer in mobilenetv2_model_tune2.layers[-2:]:
    layer.trainable = True



In [16]:
def custom_classifier(base_model, num_classes):
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(128, activation='relu',kernel_regularizer=l2(0.001))(x)
    output = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=output)
    return model

In [17]:
mobilenetv2_model_tune2 = custom_classifier(mobilenetv2_model_tune2, num_classes)

In [18]:
mobilenetv2_model_tune2.compile(loss='categorical_crossentropy',
                             optimizer='Adam',
                             metrics=['accuracy', 'AUC', 'Precision', 'Recall'])

In [19]:
history_mobilenetv2_model_tune2 = mobilenetv2_model_tune2.fit(train_generator_mobilenetv2,
                                                  steps_per_epoch=len(train_generator_mobilenetv2),
                                                  epochs=5,
                                                  validation_data=val_generator_mobilenetv2,
                                                  validation_steps=len(val_generator_mobilenetv2))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


### Model tuning 3

In [38]:
mobilenetv2_tune3 = tf.keras.applications.MobileNetV2(weights='imagenet', include_top=False)

for layer in mobilenetv2_tune3.layers:
    layer.trainable = False



In [39]:
def custom_classifier(base_model, num_classes):
    x = base_model.output
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = GlobalAveragePooling2D()(x)
    output = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=output)
    return model

In [40]:
mobilenetv2_model_tune3 = custom_classifier(mobilenetv2_tune3, num_classes)

In [41]:
mobilenetv2_model_tune3.compile(loss='categorical_crossentropy',
                             optimizer='Adam',
                             metrics=['accuracy', 'AUC', 'Precision', 'Recall'])

In [42]:
history_mobilenetv2_model_tune3 = mobilenetv2_model_tune3.fit(train_generator_mobilenetv2,
                                                  steps_per_epoch=len(train_generator_mobilenetv2),
                                                  epochs=5,
                                                  validation_data=val_generator_mobilenetv2,
                                                  validation_steps=len(val_generator_mobilenetv2))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


### Model tuning 4

In [48]:
mobilenetv2_tune4 = tf.keras.applications.MobileNetV2(weights='imagenet', include_top=False)

for layer in mobilenetv2_tune4.layers:
    layer.trainable = False



In [49]:
def custom_classifier(base_model, num_classes):
    x = base_model.output
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = GlobalAveragePooling2D()(x)
    x = Dense(128, activation='relu')(x)
    output = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=output)
    return model

In [50]:
mobilenetv2_model_tune4 = custom_classifier(mobilenetv2_tune4, num_classes)

In [51]:
mobilenetv2_model_tune4.compile(loss='categorical_crossentropy',
                             optimizer='Adam',
                             metrics=['accuracy', 'AUC', 'Precision', 'Recall'])

In [52]:
history_mobilenetv2_model_tune4 = mobilenetv2_model_tune4.fit(train_generator_mobilenetv2,
                                                  steps_per_epoch=len(train_generator_mobilenetv2),
                                                  epochs=5,
                                                  validation_data=val_generator_mobilenetv2,
                                                  validation_steps=len(val_generator_mobilenetv2))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


### Model tuning 5

In [58]:
mobilenetv2_tune5 = tf.keras.applications.MobileNetV2(weights='imagenet', include_top=False)

for layer in mobilenetv2_tune5.layers:
    layer.trainable = False



In [59]:
def custom_classifier(base_model, num_classes):
    x = base_model.output
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = GlobalAveragePooling2D()(x)
    x = Dense(128, activation='relu')(x)
    output = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=output)
    return model

In [60]:
mobilenetv2_model_tune5 = custom_classifier(mobilenetv2_tune5, num_classes)

In [61]:
mobilenetv2_model_tune5.compile(loss='categorical_crossentropy',
                             optimizer='Adam',
                             metrics=['accuracy', 'AUC', 'Precision', 'Recall'])

In [62]:
history_mobilenetv2_model_tune5 = mobilenetv2_model_tune5.fit(train_generator_mobilenetv2,
                                                  steps_per_epoch=len(train_generator_mobilenetv2),
                                                  epochs=5,
                                                  validation_data=val_generator_mobilenetv2,
                                                  validation_steps=len(val_generator_mobilenetv2))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


### Model tuning 6

In [63]:
mobilenetv2_tune6 = tf.keras.applications.MobileNetV2(weights='imagenet', include_top=False)

for layer in mobilenetv2_tune6.layers:
    layer.trainable = False



In [64]:
def custom_classifier(base_model, num_classes):
    x = base_model.output
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = GlobalAveragePooling2D()(x)
    x = Dense(128, activation='relu')(x)
    output = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=output)
    return model

In [65]:
mobilenetv2_model_tune6 = custom_classifier(mobilenetv2_tune6, num_classes)

In [66]:
mobilenetv2_model_tune6.compile(loss='categorical_crossentropy',
                             optimizer='Adam',
                             metrics=['accuracy', 'AUC', 'Precision', 'Recall'])

In [67]:
history_mobilenetv2_model_tune6 = mobilenetv2_model_tune6.fit(train_generator_mobilenetv2,
                                                  steps_per_epoch=len(train_generator_mobilenetv2),
                                                  epochs=5,
                                                  validation_data=val_generator_mobilenetv2,
                                                  validation_steps=len(val_generator_mobilenetv2))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


### Tuning 7 

In [74]:
mobilenetv2_tune7 = tf.keras.applications.MobileNetV2(weights='imagenet', include_top=False)

for layer in mobilenetv2_tune7.layers:
    layer.trainable = False



In [75]:
def custom_classifier(base_model, num_classes):
    x = base_model.output
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = GlobalAveragePooling2D()(x)
    x = Dense(256, activation='relu')(x)
    output = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=output)
    return model

In [76]:
mobilenetv2_model_tune7 = custom_classifier(mobilenetv2_tune7, num_classes)

In [77]:
mobilenetv2_model_tune7.compile(loss='categorical_crossentropy',
                             optimizer='Adam',
                             metrics=['accuracy', 'AUC', 'Precision', 'Recall'])

In [78]:
history_mobilenetv2_model_tune7 = mobilenetv2_model_tune7.fit(train_generator_mobilenetv2,
                                                  steps_per_epoch=len(train_generator_mobilenetv2),
                                                  epochs=50,
                                                  validation_data=val_generator_mobilenetv2,
                                                  validation_steps=len(val_generator_mobilenetv2))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


### Model tuning 8

In [88]:
mobilenetv2_tune8 = tf.keras.applications.MobileNetV2(weights='imagenet', include_top=False)

for layer in mobilenetv2_tune8.layers[-2:]:
    layer.trainable = True



In [89]:
def custom_classifier(base_model, num_classes):
    x = base_model.output
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = GlobalAveragePooling2D()(x)
    x = Dense(256, activation='relu')(x)
    output = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=output)
    return model

In [90]:
mobilenetv2_model_tune8 = custom_classifier(mobilenetv2_tune8, num_classes)

In [91]:
mobilenetv2_model_tune8.compile(loss='categorical_crossentropy',
                             optimizer='Adam',
                             metrics=['accuracy', 'AUC', 'Precision', 'Recall'])

In [92]:
history_mobilenetv2_model_tune8 = mobilenetv2_model_tune8.fit(train_generator_mobilenetv2,
                                                  steps_per_epoch=len(train_generator_mobilenetv2),
                                                  epochs=5,
                                                  validation_data=val_generator_mobilenetv2,
                                                  validation_steps=len(val_generator_mobilenetv2))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
