## Importing libraries

In [None]:
# Importing libraries
import os
import itertools
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import tensorflow.keras
from google.colab import drive
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report, confusion_matrix

## Connecting to Google Drive

In [None]:
drive.mount('/content/gdrive', force_remount=True)

## Preparing the dataset

#### Setting up directories, basic data checks

In [None]:
# General directory
general_dir = '/content/gdrive/MyDrive/split_upsampled_structure'

# Train folders
train_dir = general_dir+'/train'
train_mold_dir = general_dir+'/train/mold'
train_no_mold_dir = general_dir+'/train/no_mold'

# Validation folders
validation_dir = general_dir+'/validation'
validation_mold_dir = general_dir+'/validation/mold'
validation_no_mold_dir = general_dir+'/validation/no_mold'

# Test folders
test_dir = general_dir+'/test'
test_mold_dir = general_dir+'/test/mold'
test_no_mold_dir = general_dir+'/test/no_mold'

In [None]:
# Quick checks
print(os.listdir(train_no_mold_dir)[:5])
print(os.listdir(train_mold_dir)[:5])
print(os.listdir(validation_no_mold_dir)[:5])
print(os.listdir(validation_mold_dir)[:5])
print(os.listdir(test_no_mold_dir)[:5])
print(os.listdir(test_mold_dir)[:5])

In [None]:
# Checking number of images in folders
print('train mold images:', len(os.listdir(train_mold_dir)))
print('train no_mold images:', len(os.listdir(train_no_mold_dir)))
print('train total images:', len(os.listdir(train_mold_dir)) + len(os.listdir(train_no_mold_dir)))
print()
print('validation mold images:', len(os.listdir(validation_mold_dir)))
print('validation no_mold images:', len(os.listdir(validation_no_mold_dir)))
print('validation total images:', len(os.listdir(validation_mold_dir)) + len(os.listdir(validation_no_mold_dir)))
print()
print('test mold images:', len(os.listdir(test_mold_dir)))
print('test no_mold images:', len(os.listdir(test_no_mold_dir)))
print('test total images:', len(os.listdir(test_mold_dir)) + len(os.listdir(test_no_mold_dir)))
print()
print('total images:', len(os.listdir(train_mold_dir)) + len(os.listdir(train_no_mold_dir))+len(os.listdir(validation_mold_dir)) + len(os.listdir(validation_no_mold_dir))+len(os.listdir(test_mold_dir)) + len(os.listdir(test_no_mold_dir)))

#### Setup of training, validation, and test set

In [None]:
# Training set
train_generator = ImageDataGenerator(
    rescale=1./255,         # Rescaling
    rotation_range=20,     # Random rotation between -20 and 20 degrees
    width_shift_range=0.1,  # Randomly shift the width by 10%
    height_shift_range=0.1, # Randomly shift the height by 10%
    shear_range=0.1,        # Randomly shears the images (distorts along an axis, simulates looking from different angles)
    zoom_range=0.1,         # Randomly zooms in
    horizontal_flip=True,   # Random horizontal flip
    vertical_flip=True      # Random vertical flip
)

train_set = train_generator.flow_from_directory(
    train_dir,
    classes = ['no_mold','mold'],
    target_size=(224, 224),
    batch_size=16,
    shuffle=True,
    color_mode='rgb',
    class_mode='binary',
    seed=42,
    interpolation = "bilinear"
)

In [None]:
# Validation set
validation_generator = ImageDataGenerator(rescale=1./255) # Just rescaling

validation_set = validation_generator.flow_from_directory(
    validation_dir,
    classes = ['no_mold','mold'],
    target_size=(224, 224),
    batch_size=16,
    shuffle=True,
    color_mode='rgb',
    class_mode='binary',
    seed=42,
    interpolation = "bilinear"
)

In [None]:
# Test set
test_generator = ImageDataGenerator(rescale=1./255) # Just rescaling

test_set = test_generator.flow_from_directory(
    test_dir,
    classes = ['no_mold','mold'],
    target_size=(224, 224),
    batch_size=16,
    shuffle=True,
    color_mode='rgb',
    class_mode='binary',
    seed=42,
    interpolation = "bilinear"
)

#### Checking if setup is correct

In [None]:
# Accessing information about class indexes and sample sizes in train_set
no_mold_class_index = train_set.class_indices['no_mold']
mold_class_index = train_set.class_indices['mold']
print("Index of class 'no mold':", no_mold_class_index)
print("Index of class 'mold':", mold_class_index)
print()

no_mold_images_count = (train_set.classes == no_mold_class_index).sum()
mold_images_count = (train_set.classes == mold_class_index).sum()
print("Number of training images belonging to the class 'no_mold':", no_mold_images_count)
print("Number of training images belonging to the class 'mold':", mold_images_count)
print()
print("Total number of images in training set:", no_mold_images_count + mold_images_count)

In [None]:
# Plotting some train images to see if augmentation is correctly applied
def plotImages(images_arr):
    fig, axes = plt.subplots(1, 8, figsize=(20, 20))
    axes = axes.flatten()
    for img, ax in zip(images_arr, axes):
        ax.imshow(img)
    plt.tight_layout()
    plt.show()

plotImages(train_set[0][0])

## Model setup

In [None]:
# Defining the model (CNN - convolutional neural network)
cnn = tf.keras.models.Sequential()
cnn.add(tf.keras.layers.Conv2D(128, kernel_size=(5, 5), padding='same', activation='relu', input_shape=(224, 224, 3)))
cnn.add(tf.keras.layers.MaxPool2D(pool_size=(3, 3), strides=2, padding='same'))
cnn.add(tf.keras.layers.Conv2D(64, kernel_size=(2, 2), activation='relu', padding='same'))
cnn.add(tf.keras.layers.MaxPool2D((2, 2), 2, padding='same'))
cnn.add(tf.keras.layers.Conv2D(32, kernel_size=(2, 2), activation='relu', padding='same'))
cnn.add(tf.keras.layers.MaxPool2D((2, 2), 2, padding='same'))

cnn.add(tf.keras.layers.Flatten())

cnn.add(tf.keras.layers.Dense(units=512,activation='relu'))
cnn.add(tf.keras.layers.Dropout(rate=0.25))
cnn.add(tf.keras.layers.Dense(units=1,activation='sigmoid'))

In [None]:
# Model summary
cnn.summary()

In [None]:
# Compiling the model
cnn.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate = 0.001) , metrics=['accuracy', tf.keras.metrics.Recall(), tf.keras.metrics.Precision()])

## Training and saving

In [None]:
# Setting up early stopping
es = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                              min_delta=0,
                              patience=4,
                              verbose=0,
                              mode='auto')

In [None]:
# Fitting the model and creating history - early stopping deactivated
history = cnn.fit(train_set,
          validation_data = validation_set,
          batch_size=16,
          epochs = 23
#         , callbacks=[es]
          )

In [None]:
# Setting Google Drive path
drive_path = '/content/gdrive/MyDrive/MOLD/'

In [None]:
# Saving and displaying history
history_df = pd.DataFrame(history.history)
display(history_df)

history_df.to_csv(drive_path + 'history_mold_detection_model.csv')

In [None]:
# Saving (.keras: saves the model's architecture, weights, and training configuration in a single `model.keras` zip archiv)
cnn.save(drive_path + 'mold_detection_model.keras')

## Evaluation

In [None]:
# Loading the saved model or using cnn
#loaded_model = tf.keras.models.load_model(drive_path + 'mold_detection_model.keras')
loaded_model = cnn

#### Training and validation set

In [None]:
# Summarizing history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

# Summarizing history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

#### Test set

In [None]:
test_loss,test_accuracy,test_recall,test_precision=cnn.evaluate(test_set)

print('test_loss:', round(test_loss,2))
print('test_accuracy:', round(test_accuracy,2))
print('test_recall:', round(test_recall,2))
print('test_precision:', round(test_precision,2))

#### Single external pictures

In [None]:
# Importing a single image
img_path = '/content/MANDARIN.jpg'
img = image.load_img(img_path, target_size=(224, 224))  # Adjusting the target size based on model's input size
img_array = image.img_to_array(img)
img_array = np.expand_dims(img_array, axis=0)
img_array /= 255.0  # Normalizing pixel values

# Predicting the class
prediction = cnn.predict(img_array)[0][0] # It returns a nested list with just 1 element, so calling this element right away

# Getting and printing the predicted class label
if prediction <0.5:
    predicted_class = 0
    predicted_label = 'no mold'
else:
    predicted_class = 1
    predicted_label = 'mold'
print()
print("Predicted class:", predicted_class)

# Displaying the name of the class
print(predicted_label)

# Displaying the predicted class probability
print(f"Probability for mold: {round(prediction*100,2)}%")
print()

display(plt.imshow(img))