### In a bid to get hands-on practical experience on building CNNs, this is a project built for fruits image augmentation and prediction. Data was sourced [here](https://www.kaggle.com/datasets/kritikseth/fruit-and-vegetable-image-recognition)

In [1]:
#Let's get/import the required libraries and modules

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPool2D
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense, Activation, Dropout
import numpy as np

### Training image preprocessing and transformation

In [2]:
training_datagen = ImageDataGenerator(rescale = 1./255, zoom_range = 0.2, shear_range = 0.2, horizontal_flip = True)
training_images = training_datagen.flow_from_directory("fruits data/fruits train",
                                                      target_size = (224, 224), 
                                                       batch_size = 32, 
                                                       class_mode = "categorical")

Found 3115 images belonging to 36 classes.


### Validation images preprocessing

In [3]:
validation_datagen = ImageDataGenerator(rescale = 1./255)
validation_images = validation_datagen.flow_from_directory("fruits data/fruits validation",
                                                      target_size = (224, 224), 
                                                       batch_size = 32, 
                                                       class_mode = "categorical")

Found 351 images belonging to 36 classes.


### Building the CNN

In [4]:
#Initialize the model
cnn = Sequential()

#Add first convolution layer
cnn.add(Conv2D(filters = 32, kernel_size = 3, activation = "relu", input_shape = [224, 224, 3])) #third dimension of the input data representing that it's a colored image
#Pooling of the layer
cnn.add(MaxPool2D(pool_size = 2, strides = 2))

#Adding and pooling a second convolution layer
cnn.add(Conv2D(filters = 32, kernel_size = 3, activation = "relu"))
cnn.add(MaxPool2D(pool_size = 2, strides = 2))
        

#Flattening our CNN into a 1D vector
cnn.add(Flatten())
        
#Merging with fully connected dense layers with some % of dropout
cnn.add(Dense(512, activation = "relu"))
cnn.add(Dropout(0.5))

        
        
#Final layer
cnn.add(Dense(36, activation = "softmax"))
        
        
#Compile the CNN
cnn.compile(optimizer = "adam", loss = "categorical_crossentropy", metrics = ["accuracy"])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


### Training the CNN

In [5]:
cnn.fit(x = training_images, validation_data = validation_images, epochs = 30)

Epoch 1/30


  self._warn_if_super_not_called()


[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 2s/step - accuracy: 0.0472 - loss: 4.7555 - val_accuracy: 0.1225 - val_loss: 3.2223
Epoch 2/30
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m153s[0m 1s/step - accuracy: 0.0885 - loss: 3.2748 - val_accuracy: 0.2137 - val_loss: 2.9872
Epoch 3/30
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m151s[0m 1s/step - accuracy: 0.1473 - loss: 3.1398 - val_accuracy: 0.2650 - val_loss: 2.6490
Epoch 4/30
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m162s[0m 2s/step - accuracy: 0.1850 - loss: 2.9145 - val_accuracy: 0.3675 - val_loss: 2.4221
Epoch 5/30
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m159s[0m 2s/step - accuracy: 0.2349 - loss: 2.7512 - val_accuracy: 0.4957 - val_loss: 2.0381
Epoch 6/30
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m173s[0m 2s/step - accuracy: 0.2809 - loss: 2.4947 - val_accuracy: 0.5157 - val_loss: 1.7648
Epoch 7/30
[1m98/98[0m [32m━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x229d9bcf9a0>

In [9]:
from tensorflow.keras.preprocessing import image
import os

In [14]:
test_datagen = ImageDataGenerator(rescale = 1./255)
test_images = test_datagen.flow_from_directory("fruits data/fruits test",
                                                      target_size = (224, 224), 
                                                       batch_size = 32, 
                                                       class_mode = "categorical", shuffle = False)

Found 359 images belonging to 36 classes.


In [15]:
predictions = cnn.predict(test_images)

  self._warn_if_super_not_called()


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 1s/step


In [16]:
predictions = np.argmax(predictions, axis = 1)

In [17]:
loss, accuracy = cnn.evaluate(test_images)
print(f"Test Accuracy: {accuracy*100:.2f}%")

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 1s/step - accuracy: 0.9141 - loss: 0.4435
Test Accuracy: 93.87%


#### Achieving a 94% classification accuracy seems considerably reasonable 

In [18]:
test_images.classes

array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,
        3,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  5,  5,
        5,  5,  5,  5,  5,  5,  5,  5,  6,  6,  6,  6,  6,  6,  6,  6,  6,
        6,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,  8,
        8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10,
       10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
       12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13,
       13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15,
       15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 17,
       17, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18,
       18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20,
       20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22,
       22, 22, 22, 22, 22

In [19]:
predictions

array([ 0,  0,  0,  0, 25,  5,  0,  0,  0, 28,  1,  1, 35,  1,  1,  1,  1,
        1, 27,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,
        3,  3,  5,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  5,  5,
        5,  5,  5,  3,  5,  5,  5,  5,  6,  6,  6,  6, 33,  6,  6,  6,  6,
        6,  7, 31,  7,  7,  7,  7,  7,  7,  7,  7,  8,  8, 26,  8,  8,  8,
        8,  8,  8,  8,  9,  9,  9, 31,  9, 31,  9,  9, 31,  9, 10, 10, 10,
       10, 10, 10, 10, 10, 10,  4, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
       12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13,
       29, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15,
       15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 17,
       17, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18,
       18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20,
       20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22,
       22, 22, 22, 22, 22

In [20]:
from sklearn.metrics import classification_report

In [22]:
test_images.class_indices.keys()

dict_keys(['apple', 'banana', 'beetroot', 'bell pepper', 'cabbage', 'capsicum', 'carrot', 'cauliflower', 'chilli pepper', 'corn', 'cucumber', 'eggplant', 'garlic', 'ginger', 'grapes', 'jalepeno', 'kiwi', 'lemon', 'lettuce', 'mango', 'onion', 'orange', 'paprika', 'pear', 'peas', 'pineapple', 'pomegranate', 'potato', 'raddish', 'soy beans', 'spinach', 'sweetcorn', 'sweetpotato', 'tomato', 'turnip', 'watermelon'])

In [23]:
#Could be interesting to generete some labelled classification report

report = classification_report(test_images.classes, predictions, target_names = test_images.class_indices.keys())

print(report)

               precision    recall  f1-score   support

        apple       1.00      0.70      0.82        10
       banana       1.00      0.78      0.88         9
     beetroot       1.00      1.00      1.00        10
  bell pepper       0.90      0.90      0.90        10
      cabbage       0.91      1.00      0.95        10
     capsicum       0.82      0.90      0.86        10
       carrot       1.00      0.90      0.95        10
  cauliflower       1.00      0.90      0.95        10
chilli pepper       1.00      0.90      0.95        10
         corn       0.88      0.70      0.78        10
     cucumber       1.00      0.90      0.95        10
     eggplant       1.00      1.00      1.00        10
       garlic       0.91      1.00      0.95        10
       ginger       0.90      0.90      0.90        10
       grapes       1.00      1.00      1.00        10
     jalepeno       1.00      1.00      1.00        10
         kiwi       1.00      1.00      1.00        10
        l