### Additional Data Preprocessing
Although the data has been preprocessed in DataEngineer.ipynb, to continue the steps, this data might need additional preprocessing

In [5]:
# import necessary things

import os
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping


### Load Data

In [7]:
# Define constants
train_dir = 'data/train'
val_dir = 'data/val'
test_dir = 'data/test'
input_shape = (128, 128)
batch_size = 32

# Data augmentation and normalization
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

val_datagen = ImageDataGenerator(rescale=1./255)

# Flow images from directories and apply data augmentation
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=input_shape,
    batch_size=batch_size,
    class_mode='categorical'
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=input_shape,
    batch_size=batch_size,
    class_mode='categorical'
)


Found 75 images belonging to 3 classes.
Found 16 images belonging to 3 classes.


### Define model architecture

for ResNet50, DenseNet121, MobileNetV2

In [23]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.applications import ResNet50, DenseNet121, MobileNetV2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import time

# Define number of classes
num_classes = 3

# Load pre-trained models without top layers
base_models = [
    ResNet50(weights='imagenet', include_top=False, input_shape=(128, 128, 3)),
    DenseNet121(weights='imagenet', include_top=False, input_shape=(128, 128, 3)),
    MobileNetV2(weights='imagenet', include_top=False, input_shape=(128, 128, 3))
]

# Model Building
# Build custom models on top of each base model
models = []
for base_model in base_models:
    model = Sequential()
    model.add(base_model)
    model.add(GlobalAveragePooling2D())
    model.add(Dense(num_classes, activation='softmax'))
    models.append(model)

# Model compilation
# Compile the models
for model in models:
    model.compile(optimizer=Adam(),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

# Print model summaries
for i, model in enumerate(models):
    print(f"Model {i+1} Summary:")
    model.summary()
    print()

# Setting up Data Generators
train_dir = 'data/train'
val_dir = 'data/val'
test_dir = 'data/test'

batch_size = 32
target_size = (128, 128)

train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=target_size,
    batch_size=batch_size,
    class_mode='categorical'
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=target_size,
    batch_size=batch_size,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=target_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

# Define callbacks to prevent overfitting etc
checkpoint_path = 'model_checkpoints/model_{epoch:02d}-{val_loss:.2f}.h5'
checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')

early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='min', restore_best_weights=True)

# Training loop
for model in models:
    print(f"Training Model {models.index(model) + 1}...")
    start_time = time.time()

    # Train the model
    history = model.fit(
        train_generator,
        epochs=30,  # Adjusted to 30 epochs
        validation_data=val_generator,
        callbacks=[checkpoint, early_stopping]
    )

    end_time = time.time()
    training_duration = end_time - start_time
    print(f"Training duration for Model {models.index(model) + 1}: {training_duration:.2f} seconds")

    # Evaluate the model on the test set
    loss, accuracy = model.evaluate(test_generator)
    print(f"Test Loss: {loss:.4f}")
    print(f"Test Accuracy: {accuracy:.4f}")

  

    print()

# Print final conclusions or comparisons based on metrics and training times
print("Training and evaluation complete.")


Model 1 Summary:
Model: "sequential_27"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet50 (Functional)       (None, 4, 4, 2048)        23587712  
                                                                 
 global_average_pooling2d_2  (None, 2048)              0         
 7 (GlobalAveragePooling2D)                                      
                                                                 
 dense_26 (Dense)            (None, 3)                 6147      
                                                                 
Total params: 23593859 (90.00 MB)
Trainable params: 23540739 (89.80 MB)
Non-trainable params: 53120 (207.50 KB)
_________________________________________________________________

Model 2 Summary:
Model: "sequential_28"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 densenet121 (F

### Hyperparameter Tuning

using grid search or randomized search

In [25]:
pip install --upgrade tensorflow



Collecting tensorflow
  Using cached tensorflow-2.13.1-cp38-cp38-win_amd64.whl.metadata (2.6 kB)
INFO: pip is looking at multiple versions of tensorflow to determine which version is compatible with other requirements. This could take a while.


In [34]:
pip install keras


Note: you may need to restart the kernel to use updated packages.


In [2]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

from sklearn.model_selection import GridSearchCV

# Define number of classes and input shape
num_classes = 3
input_shape = (128, 128, 3)

# Function to build model with specific hyperparameters
def build_model(learning_rate=0.001, batch_size=32):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)
    model = Sequential()
    model.add(base_model)
    model.add(GlobalAveragePooling2D())
    model.add(Dense(num_classes, activation='softmax'))

    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# Define hyperparameters to tune
param_grid = {
    'learning_rate': [0.001, 0.01, 0.1],
    'batch_size': [16, 32, 64]
}

# Perform grid search
grid_search = GridSearchCV(estimator=KerasClassifier(build_fn=build_model, epochs=30, verbose=0),
                           param_grid=param_grid,
                           scoring='accuracy',
                           cv=3)

# Fit the grid search
grid_search.fit(train_generator, validation_data=val_generator)

# Print best parameters and results
print("Best Parameters:", grid_search.best_params_)
print("Best Accuracy:", grid_search.best_score_)


ModuleNotFoundError: No module named 'tensorflow.keras.wrappers'

cannot proceed to perform hyperparameter tuning