
# Convolutional Neural Network - Fruit Classification


In [1]:
#########################################################################
# Import required packages
#########################################################################

In [5]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Activation, Flatten, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint
from keras_tuner.tuners import RandomSearch
from keras_tuner.engine.hyperparameters import HyperParameters
import os

ModuleNotFoundError: No module named 'keras_tuner'

In [None]:
#########################################################################
# Set Up flow For Training & Validation data
#########################################################################

In [None]:
# data flow parameters
training_data_dir = 'data/training'
validation_data_dir = 'data/validation'
batch_size= 32
image_width =128
image_height =128
num_channels = 3
num_classes =6 

In [None]:
# image generators (adding image augmentation only to training dataset)
training_generator = ImageDataGenerator(rescale = 1./255,
                                       rotation_range =20,
                                       width_shift_range = 0.2,
                                       height_shift_range = 0.2,
                                       zoom_range =0.2,
                                       horizontal_flip =True,
                                       brightness_range = (0.5, 1.5),
                                       fill_mode = 'nearest')

validation_generator = ImageDataGenerator(rescale = 1./255)

In [None]:
# image flows
training_set = training_generator.flow_from_directory(directory = training_data_dir,
                                                     target_size = (image_width, image_height),
                                                     batch_size = batch_size,
                                                     class_mode = 'categorical')
validation_set = validation_generator.flow_from_directory(directory = validation_data_dir,
                                                     target_size = (image_width, image_height),
                                                     batch_size = batch_size,
                                                     class_mode = 'categorical')
# class mode would be binary if we had only two classes of images

In [None]:
#########################################################################
# Network Architecture
#########################################################################

In [None]:
# network architecture (hyperparameter Random Search to find the best model)
##def build_model(hp):
    model = Sequential()
    model.add(Conv2D(filters =hp.Int("input_conv_filters", min_value =32, max_value =256, step = 32),
                    kernel_size = (3,3),
                    padding = 'same',
                    input_shape = (image_width, image_height, num_channels)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D())
    
    for i in range(hp.Int("n_Conv_Layers", min_value = 1, max_value = 5, step = 1)):
        model.add(Conv2D(filters =hp.Int(f"Conv{i}_filters", min_value =32, max_value =256, step = 32), kernel_size = (3,3),padding = 'same'))
        model.add(Activation('relu'))
        model.add(MaxPooling2D())
        
    model.add(Flatten())

    for j in range(hp.Int("n_Dense_Layers",min_value =1, max_value =4, step = 1)):
        model.add(Dense(hp.Int(f"Dense_{j}_Neurons", min_value =32, max_value =256, step = 32)))
        model.add(Activation('relu'))
        
                               
        if hp.Boolean("Dropout"):
            model.add(Dropout(0.5))

    model.add(Dense(num_classes))
    model.add(Activation('softmax'))

    model.compile(loss= 'categorical_crossentropy',
                 optimizer = hp.Choice ('optimizer', values = ['adam','RMSProp']),
                 metrics = ['accuracy'])
    return model

In [None]:
## tuner = RandomSearch(hypermodel = build_model,
                    objective = 'val_accuracy',
                    max_trials = 5,
                    executions_per_trial = 5,
                    directory = os.path.normpath('C:/'),
                    project_name = 'fruit_CNN',
                    overwrite = True)

### tuner.search(x = training_set, 
            validation_data = validation_set,
            epochs = 100,
            batch_size = 32)

In [None]:
### view artitecture
## tuner.results_summary()

In [None]:
# best network
### tuner.get_best_hyperparameters()[0].values

In [None]:
## summary of best network
### tuner.get_best_models()[0].summary()

In [None]:
# network architecture
model = Sequential()
model.add(Conv2D(filters =96,
                kernel_size = (3,3),
                padding = 'same',
                input_shape = (image_width, image_height, num_channels)))
model.add(Activation('relu'))
model.add(MaxPooling2D())



model.add(Conv2D(filters =192,
                kernel_size = (3,3),
                padding = 'same'))
model.add(Activation('relu'))
model.add(MaxPooling2D())

model.add(Conv2D(filters =160,
                kernel_size = (3,3),
                padding = 'same'))
model.add(Activation('relu'))
model.add(MaxPooling2D())

model.add(Conv2D(filters =160,
                kernel_size = (3,3),
                padding = 'same'))
model.add(Activation('relu'))
model.add(MaxPooling2D())

model.add(Flatten())

model.add(Dense(192))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(num_classes))
model.add(Activation('softmax'))


model.compile(loss= 'categorical_crossentropy',
             optimizer ='adam',
             metrics = ['accuracy'])

In [None]:
#########################################################################
# Train Our Network!
#########################################################################

In [None]:
# training parameters
num_epochs = 100
model_filename = 'models/fruits_CNN_V05.h5'

In [None]:
# callbacks
save_best_model = ModelCheckpoint(filepath = model_filename,
                                 monitor = 'val_accuracy',
                                 mode = 'max',
                                 verbose =1,
                                 save_best_only = True)

In [None]:
# train the network
history = model.fit(x = training_set,
                   validation_data = validation_set,
                   batch_size = batch_size,
                   epochs = num_epochs,
                   callbacks = [save_best_model])

In [None]:
#########################################################################
# Visualise Training & Validation Performance
#########################################################################

In [None]:
import matplotlib.pyplot as plt

# plot validation results
fig, ax = plt.subplots(2, 1, figsize=(15,15))
ax[0].set_title('Loss')
ax[0].plot(history.epoch, history.history["loss"], label="Training Loss")
ax[0].plot(history.epoch, history.history["val_loss"], label="Validation Loss")
ax[1].set_title('Accuracy')
ax[1].plot(history.epoch, history.history["accuracy"], label="Training Accuracy")
ax[1].plot(history.epoch, history.history["val_accuracy"], label="Validation Accuracy")
ax[0].legend()
ax[1].legend()
plt.show()

In [None]:
# get best epoch performance for validation accuracy
max(history.history['val_accuracy'])

In [None]:
#########################################################################
# Make Predictions On New Data (Test Set)
#########################################################################

In [None]:
# import required packages
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import numpy as np
import pandas as pd
from os import listdir



In [None]:
# parameters for prediction
model_filename = 'models/fruits_CNN_V05.h5'
image_width =128
image_height =128
labels_list = ['apple', 'avocado', 'banana', 'kiwi', 'lemon', 'orange']


In [None]:
# load model
model = load_model(model_filename)

In [None]:
# image pre-processing function

def preprocess_image(filepath):
    image =load_img(filepath,target_size = (image_width,image_height))

    image = img_to_array(image)
    image = np.expand_dims(image, axis =0)
    image = image * (1./255)
    
    return image

# image prediction function

def make_prediction(image):
    class_probs = model.predict(image)
    predicted_class = np.argmax(class_probs)

    predicted_label =labels_list[predicted_class]
    predicted_prob = class_probs[0][predicted_class]
    
    return predicted_label, predicted_prob


In [None]:
# loop through test data

source_dir ='data/test/'
folder_names =  ['apple', 'avocado', 'banana', 'kiwi', 'lemon', 'orange']
actual_labels = []
predicted_labels = []
predicted_probabilities = []
filenames =[]

for folder in folder_names:
    
    images = listdir(source_dir + '/' + folder)
    
    for image in images:
        
        processed_image = preprocess_image(source_dir + '/'+ folder + '/' + image)
        predicted_label, predicted_probability = make_prediction(processed_image)
        
        actual_labels.append(folder)
        predicted_labels.append(predicted_label)
        predicted_probabilities.append(predicted_probability)
        filenames.append(image)

In [None]:
# create dataframe to analyse
prediction_df = pd.DataFrame({"actual_label": actual_labels,
                             "predicted_label": predicted_labels,
                             "predicted_probability": predicted_probabilities,
                             "filename": filenames})

In [None]:
prediction_df['correct'] = np.where(prediction_df['actual_label']== prediction_df['predicted_label'], 1, 0)
prediction_df

In [None]:
# overall test set accuracy

test_set_accuracy = prediction_df['correct'].sum() / len(prediction_df)
print(test_set_accuracy)
## 68 % basic
## 0.88 % dropout
## 93 % augmentation & dropout
## hp 100 % accuracy

In [None]:
# confusion matrix
confusion_matrix =pd.crosstab(prediction_df['predicted_label'], prediction_df['actual_label'])
confusion_matrix

In [None]:
### percentage (conf_matrix)
confusion_matrix =pd.crosstab(prediction_df['predicted_label'], prediction_df['actual_label'], normalize = 'columns')
confusion_matrix