# Use CNN and a Pre-trained model for image classification

---



In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import tensorflow as tf
import os
from pathlib import Path

# # load the dataset
# from google.colab import drive
# drive.mount('/content/gdrive')
# data_path = Path("/content/gdrive/My Drive/data/vegitable_fruit")

# filepaths = list(data_path.glob(r'**/*.jpg'))

/Users/ramihuunguyen/Downloads/vegitable_fruit


In [None]:
labels = [str(filepaths[i]).split("/")[-2] \
           for i in range(len(filepaths))]

filepaths = pd.Series(filepaths, name='Filepath').astype(str)
labels = pd.Series(labels, name='Label')

# Concatenate filepaths and labels
df = pd.concat([filepaths, labels], axis=1)

# Shuffle the DataFrame and reset index
df = df.sample(frac=1).reset_index(drop = True)

Lable_list=df.Label.unique()
no_classes=len(Lable_list)

In [None]:
print('-- The data set information --\n')
print(f'Number of pictures: {df.shape[0]}\n')
print(f'Number of different labels: {no_classes}\n')
print(f'Labels: {Lable_list}')

In [None]:
df.head(5)

In [None]:
df_unique = df.copy().drop_duplicates(subset=["Label"]).reset_index()
K =df_unique.shape[0]  ## K = 20, the number of classes

# Display some pictures of the dataset, one for each class
fig, axes = plt.subplots(nrows=4, ncols=5, figsize=(15, 7),
                        subplot_kw={'xticks': [], 'yticks': []})

for i, ax in enumerate(axes.flat):
    ax.imshow(plt.imread(df_unique.Filepath[i]))
    ax.set_title(df_unique.Label[i], fontsize = 12)
plt.tight_layout(pad=0.15)
plt.show()


In [None]:
### check one image
plt.imshow(plt.imread(df['Filepath'].iloc[1]))
plt.show()

In [None]:
### the images have been shuffled, so we can use the first 200 for training, the next 200 for validation, the next 200 for testing
df_train=df.iloc[:200,]
df_validation=df.iloc[200:400,]
df_test=df.iloc[400:600,]

### check the class distribution in training data
df_train['Label'].value_counts()

In [None]:
### check the class distribution in testing data
df_test['Label'].value_counts()

In [None]:
### prepare the images for training, validation, and testing

train_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input
)

validation_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input
)
test_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input
)

train_images = train_generator.flow_from_dataframe(
    dataframe=df_train,
    x_col='Filepath',
    y_col='Label',
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    seed=0,
    rotation_range=30,
    zoom_range=0.15,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.15,
    horizontal_flip=True,
    fill_mode="nearest"
)

validation_images = validation_generator.flow_from_dataframe(
    dataframe=df_validation,
    x_col='Filepath',
    y_col='Label',
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=False
)

test_images = test_generator.flow_from_dataframe(
    dataframe=df_test,
    x_col='Filepath',
    y_col='Label',
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=False
)

# Build a CNN model for image classification

In [None]:
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from keras.losses import categorical_crossentropy
from keras.optimizers import Adam


# Use a pre-trained model for image classification



In [None]:
### Get the pre-trained model, here we use MobileNetV2
### There are many other pre-trained models to use. Check them at https://keras.io/api/applications/

pretrained_model = tf.keras.applications.MobileNetV2(
    input_shape=(224, 224, 3),
    include_top=False,
    weights='imagenet',
    pooling='avg'
)
## fixing the pretrained model, which should not be trained (updated) in the classifier training process
pretrained_model.trainable = False

## define the size of input to the classifier, 224*224*3.
## Each image has 224*224 pixels. Each pixel is presented by using a combination of three colors, namely Red, Green, Blue
## This size is the same as the input of the pre-trained model
inputs = pretrained_model.input

## define the classifier, including two hidden layers, each with 128 hidden units
x = tf.keras.layers.Dense(128, activation='relu')(pretrained_model.output)
x = tf.keras.layers.Dense(128, activation='relu')(x)

## define the output layer,  with 20 hidden units beause there are K=20 classes
outputs = tf.keras.layers.Dense(K, activation='softmax')(x)  ## K=20, the number of classes

## specify the classification model, training loss and training optimizer
model_with_pre_trained = tf.keras.Model(inputs=inputs, outputs=outputs)

model_with_pre_trained.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

## train the classifier.
## NOTE: here only the two-layer classifier is trained. The pre-trained model is not trained.
history = model_with_pre_trained.fit(
    train_images,
    validation_data=validation_images,
    batch_size = 32,
    epochs=5
)