## Importing libraries

In [None]:
# Installing opendatasets library
!pip install opendatasets

In [None]:
# Importing libraries
import os
import itertools
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import opendatasets as od
from google.colab import drive
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import image_dataset_from_directory
from sklearn.metrics import classification_report, confusion_matrix

## Connecting to Google Drive

In [None]:
drive.mount('/content/gdrive')

## Dataset download and preprocessing

#### Downloading fruits & vegetable image recognition dataset

In [None]:
# Downloading the dataset from Kaggle (requires username and key; key to be found after login in the Kaggle profile under API)
od.download(
    "https://www.kaggle.com/datasets/kritikseth/fruit-and-vegetable-image-recognition")

In [None]:
# Setting directories for train and validation set
train_dir = r"/content/fruit-and-vegetable-image-recognition/train"
valid_dir = r'/content/fruit-and-vegetable-image-recognition/validation'

# Check
train_dir

#### Setup of training, validation, and test set

In [None]:
# Training set setup
training_set= tf.keras.utils.image_dataset_from_directory(
    train_dir,
    labels = 'inferred',
    label_mode= 'categorical',
    class_names=None,
    color_mode='rgb',
    batch_size=32,
    image_size=(224,224),
    shuffle= True,
    seed=None,
    validation_split=None,
    subset= None,
    interpolation = "bilinear",
    follow_links= False,
    crop_to_aspect_ratio = False
)

In [None]:
# Validation set setup
validation_set= tf.keras.utils.image_dataset_from_directory(
    valid_dir,
    labels = 'inferred',
    label_mode= 'categorical',
    class_names=None,
    color_mode='rgb',
    batch_size=32,
    image_size=(224,224),
    shuffle= True, ### Shuffling
    seed=None,
    validation_split=None,
    subset= None,
    interpolation = "bilinear",
    follow_links= False,
    crop_to_aspect_ratio = False
)

In [None]:
# Test set setup
test_set= tf.keras.utils.image_dataset_from_directory(
    r'/content/fruit-and-vegetable-image-recognition/test',
    labels = 'inferred',
    label_mode= 'categorical',
    class_names=None,
    color_mode='rgb',
    batch_size=32,
    image_size=(224,224),
    shuffle= True,
    seed=None,
    validation_split=None,
    subset= None,
    interpolation = "bilinear",
    follow_links= False,
    crop_to_aspect_ratio = False
)

In [None]:
# Extracting class names before rescaling
training_set_class_names = training_set.class_names
validation_set_class_names = validation_set.class_names
test_set_class_names = test_set.class_names

# Check
test_set_class_names

#### Rescaling

In [None]:
# Check before rescaling
for image, label in training_set.take(1):
    print(image)

In [None]:
# Rescaling
def preprocess_rescale(image,label):
    image = tf.cast(image/255. ,tf.float32)
    label = tf.cast(label ,tf.float32)
    return image,label

training_set = training_set.map(preprocess_rescale)
validation_set = validation_set.map(preprocess_rescale)
test_set = test_set.map(preprocess_rescale)

In [None]:
# Check after rescaling
for image, label in training_set.take(1):
    print(image)

## Model setup

In [None]:
# Defining the model (CNN - convolutional neural network)
cnn = tf.keras.models.Sequential()
cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=[224,224,3])) # Loading
cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu'))
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
cnn.add(tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu'))
cnn.add(tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu'))
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
cnn.add(tf.keras.layers.Flatten()) # Flattening
cnn.add(tf.keras.layers.Dense(units=512, activation='relu'))
cnn.add(tf.keras.layers.Dense(units=512, activation='relu'))
cnn.add(tf.keras.layers.Dropout(0.4)) # Regularization
cnn.add(tf.keras.layers.Dense(units=36, activation='softmax'))

In [None]:
# Model summary
cnn.summary()

In [None]:
# Compiling the model
cnn.compile(loss='categorical_crossentropy' , optimizer=tf.keras.optimizers.Adam(learning_rate = 0.001) , metrics='accuracy')

## Training and saving

In [None]:
# Setting up early stopping
es = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                              min_delta=0,
                              patience=5,
                              verbose=0, mode='auto')

In [None]:
# Fitting the model (and creating history)
history = cnn.fit(training_set,
          validation_data = validation_set,
          batch_size=16,
          epochs = 3,
          callbacks=[es]
          )

In [None]:
# Setting Google Drive path
drive_path = '/content/gdrive/MyDrive/Data Analytics/Ironhack/Mold detection/cnn_models/'

In [None]:
# Saving and displaying history
history_df = pd.DataFrame(history.history)
display(history_df)

history_df.to_csv(drive_path + 'history_fruit_veg_classifier.csv')

In [None]:
# Saving (.keras: saves the model's architecture, weights, and training configuration in a single `model.keras` zip archiv)
cnn.save(drive_path + 'fruit_veg_classifier.keras')

## Evaluation

In [None]:
# Loading the saved model or using cnn
#loaded_model = tf.keras.models.load_model(drive_path + 'fruit_veg_classifier.keras')
loaded_model = cnn

#### Training and validation set

In [None]:
# Summarizing history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

# Summarizing history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [None]:
# Plotting the confusion matrix (validation_set)

# Initializing variables to store true and predicted labels
true_labels = []
predicted_labels = []

# Iterate through the validation set and make predictions
for images, labels in validation_set:
    true_labels.extend(np.argmax(labels, axis=1))  # Get true labels
    predictions = loaded_model.predict(images)
    predicted_labels.extend(np.argmax(predictions, axis=1))  # Get predicted labels

# Create a classification report
class_names = validation_set_class_names
report = classification_report(true_labels, predicted_labels, target_names=class_names)

# Print the classification report
print("Classification Report:\n", report)

# Create a confusion matrix
confusion = confusion_matrix(true_labels, predicted_labels)

# Plot the confusion matrix
def plot_confusion_matrix(cm, class_names):
    plt.figure(figsize=(16, 12))
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title('Confusion Matrix')
    plt.colorbar()
    tick_marks = np.arange(len(class_names))
    plt.xticks(tick_marks, class_names, rotation=45)
    plt.yticks(tick_marks, class_names)

    fmt = 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()
plot_confusion_matrix(confusion, class_names)
plt.show()

#### Test set - full

In [None]:
test_loss,test_accuracy=cnn.evaluate(test_set)

In [None]:
# Plotting the confusion matrix (test_set)

# Initializing variables to store true and predicted labels
true_labels = []
predicted_labels = []

# Iterate through the test set and make predictions
for images, labels in test_set:
    true_labels.extend(np.argmax(labels, axis=1))  # Get true labels
    predictions = loaded_model.predict(images)
    predicted_labels.extend(np.argmax(predictions, axis=1))  # Get predicted labels

# Create a classification report
class_names = test_set_class_names
report = classification_report(true_labels, predicted_labels, target_names=class_names)

# Print the classification report
print("Classification Report:\n", report)

# Create a confusion matrix
confusion = confusion_matrix(true_labels, predicted_labels)

# Plot the confusion matrix
def plot_confusion_matrix(cm, class_names):
    plt.figure(figsize=(16, 12))
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title('Confusion Matrix')
    plt.colorbar()
    tick_marks = np.arange(len(class_names))
    plt.xticks(tick_marks, class_names, rotation=45)
    plt.yticks(tick_marks, class_names)

    fmt = 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()
plot_confusion_matrix(confusion, class_names)
plt.show()

#### Test set - subsample

In [None]:
# Evaluating predictions of a 3x3 subsample from the test dataset

# Retrieve a few test samples and their corresponding true labels
num_samples_to_display = 9
sample_images, sample_labels = next(iter(test_set.take(num_samples_to_display)))

# Get the corresponding class names from the test_set
class_names = test_set_class_names

# Make predictions on the sampled images
sample_predictions = cnn.predict(sample_images)
sample_predictions = np.argmax(sample_predictions, axis=1)

# Displaying the images with their true and predicted labels
fig, axes = plt.subplots(nrows=3, ncols=3, figsize=(10, 10),
                         subplot_kw={'xticks': [], 'yticks': []})

for i, ax in enumerate(axes.flat):
    ax.imshow(sample_images[i].numpy().astype(np.uint8))
    true_label = class_names[np.argmax(sample_labels[i])]
    predicted_label = class_names[sample_predictions[i]]
    ax.set_title(f"True: {true_label}\nPredicted: {predicted_label}")

plt.tight_layout()
plt.show()

#### Single external pictures

In [None]:
# Importing a single image
img_path = '/content/MANDARIN.jpg'
img = image.load_img(img_path, target_size=(224, 224))  # Adjusting the target size based on model's input size
img_array = image.img_to_array(img)
img_array = np.expand_dims(img_array, axis=0)
img_array /= 255.0  # Normalizing pixel values

# Predicting the class
predictions = cnn.predict(img_array)

# Getting and printing the predicted class label
predicted_class = np.argmax(predictions)
print()
print("Predicted class:", predicted_class)

# Displaying the name of the class
predicted_label = class_names[np.argmax(predictions)]
print(predicted_label)
print()

# Displaying the predicted class probabilities
#print("Predicted probabilities:", predictions)
predictions_df = pd.DataFrame({'class': list(range(0, 36)), 'label': test_set_class_names, 'prediction (prob)': list(predictions[0])})
predictions_df = predictions_df.sort_values(by='prediction (prob)', ascending=False)
print(predictions_df)
print()

display(predicted_class, predicted_label)
display(plt.imshow(img))