# MLZoomcamp 2022 Capstone Project

Author: José Victor

* Dataset: [Surface Crack Detection](https://www.kaggle.com/datasets/arunrk7/surface-crack-detection)

## Imports

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.lite as tflite

from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras import layers

The path to the dataset

In [None]:
filepath = "../data/dataset"

Defining the `ImageDataGenerator`

In [None]:
datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

Setting a value for the `batch_size`

In [None]:
batch_size = 64

Train and validation dataset split

In [None]:
train_dataset = datagen.flow_from_directory(filepath,
                                         target_size=(150,150),
                                         batch_size=batch_size,
                                         shuffle=True,
                                         subset="training",
                                         class_mode="binary")

In [None]:
validation_dataset = datagen.flow_from_directory(filepath,
                                         target_size=(150,150),
                                         batch_size=batch_size,
                                         shuffle=True,
                                         subset="validation",
                                         class_mode="binary")

Defining the loss that will be used in the model. This problem has only two labels, then we must choice the `BinaryCrossentropy`

In [None]:
loss = keras.losses.BinaryCrossentropy()

Defining a convolutional neural network that will classify the data using `keras.Sequential`

In [None]:
model = keras.Sequential([layers.Input(shape=(150, 150, 3)),
                         layers.Conv2D(filters=16, kernel_size=3, activation="relu"),
                         layers.MaxPooling2D(pool_size=(2,2)),
                         layers.Flatten(),
                         layers.Dense(units=32, activation="relu"),
                         layers.Dense(units=1, activation="sigmoid")], name="cnn")

Model summary

In [None]:
model.summary()

Setting the parameters for training

In [None]:
epochs = 5
learning_rate = 0.001
optimizer = keras.optimizers.Adam(learning_rate=learning_rate)

Compiling the model

In [None]:
model.compile(optimizer=optimizer, loss=loss, metrics=["accuracy"])

Training the model

In [None]:
model.fit(train_dataset, epochs=epochs, validation_data=validation_dataset)

Plotting the loss and the accuracy of the model for train and validation data

In [None]:
fig, ax = plt.subplots(ncols=2)
fig.set_size_inches((12,5))

for loss_name in ["loss", "val_loss"]:
    ax[0].plot(np.arange(1,6), hist.history[loss_name], label=f"{loss_name}")
    
for accuracy in ["accuracy", "val_accuracy"]:
    ax[1].plot(np.arange(1,6), hist.history[accuracy], label=f"{accuracy}")

ax[1].plot(np.arange(1,6), np.ones(5), linestyle="--", color="black")
    
ax[0].set_xlabel("Epochs")
ax[1].set_xlabel("Epochs")
ax[0].set_ylabel("Loss")
ax[1].set_ylabel("Accuracy")
ax[0].set_xticks(np.arange(1,6))
ax[1].set_xticks(np.arange(1,6))
ax[0].legend()
ax[1].legend()
plt.show()

Saving the model as a h5 file

In [None]:
lr_str = str(learning_rate).replace(".", "_")
model.save(f"../model/cnn_surface_crack_detection_bs{batch_size}_epochs{epochs}_lr_{lr_str}.h5")

Creating a converter in order to convert the model saved as a h5 to a tflite file

In [None]:
converter = tflite.TFLiteConverter.from_keras_model(model=model)
tflite_model = converter.convert()

Saving the model as a tflite file

In [None]:
with open("../model/cnn_surface_crack_detection_bs64_epochs5_lr_0_001.tflite", "wb") as file:
    file.write(tflite_model)