# Image Multi Class Classification 


#### Import Library

In [None]:
import os
import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
%matplotlib inline

#### Exploratory Data Analysis

In [None]:
base_dir = '../input/ecommerce-products-image-dataset/ecommerce products'
print(os.listdir(base_dir))

> There are 4 type of product (class)

In [None]:
jeans_dir = os.path.join(base_dir, 'jeans')
sofa_dir = os.path.join(base_dir, 'sofa')
tshirt_dir = os.path.join(base_dir, 'tshirt')
tv_dir = os.path.join(base_dir, 'tv')

jeans_fnames = os.listdir(jeans_dir)
sofa_fnames = os.listdir(sofa_dir)
tshirt_fnames = os.listdir(tshirt_dir)
tv_fnames = os.listdir(tv_dir)
print("count jeans :",len(jeans_fnames))
print("count sofa :",len(sofa_fnames))
print("count tshirt :",len(tshirt_fnames))
print("count tv :",len(tv_fnames))

> Data already balanced for every class

In [None]:
nrows = 4
ncols = 8
pic_index = 0

fig = plt.gcf()
fig.set_size_inches(ncols*4, nrows*4)

pic_index+=8

next_jeans_pix = [os.path.join(jeans_dir, fname) 
                for fname in jeans_fnames[ pic_index-8:pic_index] 
               ]

next_sofa_pix = [os.path.join(sofa_dir, fname) 
                for fname in sofa_fnames[ pic_index-8:pic_index]
               ]

next_tshirt_pix = [os.path.join(tshirt_dir, fname) 
                for fname in tshirt_fnames[ pic_index-8:pic_index]
               ]

next_tv_pix = [os.path.join(tv_dir, fname) 
                for fname in tv_fnames[ pic_index-8:pic_index]
               ]


for i, img_path in enumerate(next_jeans_pix+next_sofa_pix+next_tshirt_pix+next_tv_pix):
  # Set up subplot; subplot indices start at 1
  sp = plt.subplot(nrows, ncols, i + 1)
  sp.axis('Off') # Don't show axes (or gridlines)

  img = mpimg.imread(img_path)
  plt.imshow(img)

plt.show()

> We can see that in tv dataset, there are some noise data, so we must remove that noise

#### Data Preprocessing
I remove the noise in dataset manually, and this is the result

In [None]:
base_dir_cleaned ='../input/ecommerceproductcleaned/ecommerce products_cleaned'

In [None]:
tv_dir_cleaned = os.path.join(base_dir_cleaned, 'tv')
tv_fnames_cleaned = os.listdir(tv_dir_cleaned)
print("count tv :",len(tv_fnames_cleaned))

> there is still 146 / 199 (original data)

#### Split Data

In [None]:
img_height = 150
img_width = 150
batch_size = 50

datagen = ImageDataGenerator(rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2) # set validation split

train_generator = datagen.flow_from_directory(
    base_dir_cleaned,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='training') # set as training data

validation_generator = datagen.flow_from_directory(
    base_dir_cleaned, # same directory as training data
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation')

#### Build Model

In [None]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(8, (3,3), activation='relu', input_shape=(150, 150, 3)),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(16, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2), 
    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2), 
    tf.keras.layers.Flatten(), 
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(512, activation='relu'), 
    tf.keras.layers.Dense(4, activation='softmax')  
])

In [None]:
model.summary()

In [None]:
model.compile(optimizer=RMSprop(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics = ['accuracy'])

In [None]:
history = model.fit(
            train_generator,
            epochs=15,
            validation_data=validation_generator,
            verbose=2
            )

> We got the final result for model is having training accuracy and val accuracy greater than 96%

#### Model Evaluation

In [None]:
acc      = history.history[     'accuracy' ]
val_acc  = history.history[ 'val_accuracy' ]
loss     = history.history[    'loss' ]
val_loss = history.history['val_loss' ]

epochs   = range(len(acc))

#------------------------------------------------
# Plot training and validation accuracy per epoch
#------------------------------------------------
plt.plot  ( epochs,     acc )
plt.plot  ( epochs, val_acc )
plt.title ('Training and validation accuracy')
plt.figure()

#------------------------------------------------
# Plot training and validation loss per epoch
#------------------------------------------------
plt.plot  ( epochs,     loss )
plt.plot  ( epochs, val_loss )
plt.title ('Training and validation loss'   )

> From the graph , we can see the accuracy and loss of the model already quite balanced