In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Unzip Dataset
!rm -rf "/content/fruits"
!unzip -o "/content/drive/MyDrive/fruit_dataset.zip" -d "/content/fruits"

In [None]:
# Upgrade pip
!pip install --upgrade pip --quiet

# Install MLflow + pyngrok
!pip install --quiet \
    mlflow==2.14.3 \
    pyngrok==7.4.0 \
    "docker<8" \
    "graphene<4" \
    "querystring-parser<2" \
    "gunicorn<23" \
    "importlib-metadata<8" \
    "packaging<25" \
    "pyarrow>=15.0.0,<19" \
    "pytz<2025" \
    "protobuf>=4.25.3,<6"

In [None]:
import tensorflow as tf
import mlflow

print("TensorFlow:", tf.__version__)
print("MLflow:", mlflow.__version__)

In [None]:
# Imports
import os
import time
import shutil
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from skimage import exposure
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout
from tensorflow.keras import layers, models
from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, LearningRateScheduler, BackupAndRestore, TensorBoard
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.preprocessing import label_binarize
from mlflow.exceptions import MlflowException
from mlflow.models.signature import infer_signature

In [None]:
# mlflow setup
import mlflow
import mlflow.tensorflow
from pyngrok import ngrok
import subprocess

In [None]:
# Create a MLflow folder inside Google Drive
mlflow_drive_dir = "/content/drive/MyDrive/mlflow_runs"
import os
os.makedirs(mlflow_drive_dir, exist_ok=True)

In [None]:
# Enable TensorFlow autologging
mlflow.tensorflow.autolog(log_models=True, log_datasets=False)

In [None]:
# Start MLflow UI via ngrok

# Authenticate ngrok in Colab
from pyngrok import ngrok

# Set ngrok authtoken
NGROK_TOKEN = "33P0OLhfhI2VGm3IUmIdSI7WbLk_4ZFB1QGGoPYdCZ3QZGkQ7"
ngrok.set_auth_token(NGROK_TOKEN)

In [None]:
# Kill old MLflow if already running
!ps aux | grep "mlflow ui" | grep -v grep | awk '{print $2}' | xargs -r kill -9 || echo "No MLflow instance running"

In [None]:
# Kill old tunnels
ngrok.kill()

In [None]:
# Start MLflow server bound to all interfaces
subprocess.Popen([
    "mlflow", "ui",
    "--backend-store-uri", f"sqlite:///{mlflow_drive_dir}/mlflow.db",
    "--default-artifact-root", mlflow_drive_dir,
    "--host", "0.0.0.0",
    "--port", "5000"
])

In [None]:
# Wait a few seconds to ensure MLflow starts
time.sleep(5)

In [None]:
# Point notebook to the same backend
mlflow.set_tracking_uri(f"http://127.0.0.1:5000")

In [None]:
# Define experiment name
experiment_name = "Fruit_ResNet50V2"

# Create the experiment if it doesn't exist
if mlflow.get_experiment_by_name(experiment_name) is None:
    mlflow.create_experiment(
        name=experiment_name,
        artifact_location=mlflow_drive_dir  # store artifacts (models, plots) here
    )

# Set the experiment for logging
mlflow.set_experiment(experiment_name)

print(f"MLflow experiment '{experiment_name}' is set!")

In [None]:
# Retrieve experiment details
exp = mlflow.get_experiment_by_name(experiment_name)
print("Current Experiment:")
print(f"  ID: {exp.experiment_id}")
print(f"  Name: {exp.name}")
print(f"  Artifact Location: {exp.artifact_location}")

In [None]:
# Open ngrok tunnel to access MLflow UI
mlflow_url = ngrok.connect(5000)
print("MLflow Tracking UI:", mlflow_url)

In [None]:
import mlflow
from mlflow.tracking import MlflowClient

def start_mlflow_run_auto(run_prefix="Run", nested=False):
    """
    Start a new MLflow run with an auto-incremented name.
    """
    experiment_name = "Fruit_ResNet50V2"
    mlflow.set_experiment(experiment_name)
    experiment = mlflow.get_experiment_by_name(experiment_name)

    # Use MlflowClient to fetch existing runs in the experiment
    client = MlflowClient()
    runs = client.search_runs([experiment.experiment_id])

    run_number = len(runs) + 1
    run_name = f"{run_prefix}_{run_number}"

    return mlflow.start_run(run_name=run_name, experiment_id=experiment.experiment_id, nested=nested)

In [None]:
# paths for the dataset
source_dir = '/content/fruits/fruit_dataset'
destination_dir = '/content/fruit_data'

In [None]:
# to list all class folder names inside the dataset
fruit_folders = sorted(os.listdir(source_dir))
print("Fruit Classes:", fruit_folders)

In [None]:
# Create directories and copy the relevant fruit folders
os.makedirs(destination_dir, exist_ok=True)

for folder in fruit_folders:
    src_folder = os.path.join(source_dir, folder)
    dst_folder = os.path.join(destination_dir, folder)

    # to only copy if destination folder doesn't already exist
    if not os.path.exists(dst_folder):
        shutil.copytree(src_folder, dst_folder)

print("All fruit folders copied to:", destination_dir)

In [None]:
def split_dataset(source_dir, destination_dir, train_size=0.7, val_size=0.2, test_size=0.1):

    # safer check for floating-point sum
    assert abs(train_size + val_size + test_size - 1.0) < 1e-6, \
           "Train, val, and test sizes must sum to 1."

    splits = ['train', 'val', 'test']
    split_dirs = {split: os.path.join(destination_dir, split) for split in splits}

    # Create base split directories
    for dir_path in split_dirs.values():
        os.makedirs(dir_path, exist_ok=True)

    # Iterate over classes
    for class_name in os.listdir(source_dir):
        class_path = os.path.join(source_dir, class_name)
        if not os.path.isdir(class_path):
            continue

        # Create class folders inside each split folder
        for dir_path in split_dirs.values():
            os.makedirs(os.path.join(dir_path, class_name), exist_ok=True)

        # List images in class folder
        images = os.listdir(class_path)

        # Split into train and temp (val+test)
        train_imgs, temp_imgs = train_test_split(images, train_size=train_size, random_state=42)

        # Calculate relative val and test sizes
        val_ratio = val_size / (val_size + test_size)

        # Split temp into val and test
        val_imgs, test_imgs = train_test_split(temp_imgs, test_size=1 - val_ratio, random_state=42)

        # Copy images to their respective folders
        for img_list, split in zip([train_imgs, val_imgs, test_imgs], splits):
            for img in img_list:
                src = os.path.join(class_path, img)
                dst = os.path.join(split_dirs[split], class_name, img)
                shutil.copy(src, dst)

In [None]:
# Splitting Dataset
source_dir = '/content/fruit_data'           # folder with copied data
destination_dir = '/content/fruit_split_data'  # folder where the train/val/test splits will be created

split_dataset(source_dir, destination_dir)

In [None]:
# Class Distribution in the train split
split_dataset_path = '/content/fruit_split_data/train'  # the train folder path
class_names = [folder for folder in os.listdir(split_dataset_path)
               if os.path.isdir(os.path.join(split_dataset_path, folder))]

for class_name in class_names:
    class_path = os.path.join(split_dataset_path, class_name)
    num_images = len(os.listdir(class_path))
    print(f"{class_name}: {num_images} images")

In [None]:
# Data Generators Setup for Raw vs Ripe Fruit
target_size = (224, 224)
batch_size = 30
num_classes = 22   # 11 raw + 11 ripe = 22 classes
channels = 3
epochs = 100

# data augmentation - this helps us to increase the size of the dataset and introduce variability in the dataset.
# data augmentation with keras by using the ImageDataGenerator class.
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=50,
    width_shift_range=0.3,
    height_shift_range=0.3,
    zoom_range=0.3,
    horizontal_flip=True,
    brightness_range=(0.4, 1.6),
    fill_mode='nearest',
    channel_shift_range=40.0,
    shear_range=25.0
)

val_datagen = ImageDataGenerator(
    rescale=1./255,
    brightness_range=[0.7, 1.3]
)

test_datagen = ImageDataGenerator(
    rescale=1./255,
    brightness_range=[0.7, 1.3]
)

train_generator = train_datagen.flow_from_directory(
    '/content/fruit_split_data/train',
    target_size=target_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=True,
    seed=42
)

val_generator = val_datagen.flow_from_directory(
    '/content/fruit_split_data/val',
    target_size=target_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

test_generator = test_datagen.flow_from_directory(
    '/content/fruit_split_data/test',
    target_size=target_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

In [None]:
import os

base_path = '/content/fruit_split_data'

# Check each split directory
for split in ['train', 'val', 'test']:
    split_path = os.path.join(base_path, split)
    print(f"\nContents of: {split_path}")

    if not os.path.exists(split_path):
        print(f" {split_path} does NOT exist!")
        continue

    class_folders = [folder for folder in os.listdir(split_path)
                     if os.path.isdir(os.path.join(split_path, folder))]

    print(f"Found {len(class_folders)} class folders.")

    for class_name in sorted(class_folders):
        class_path = os.path.join(split_path, class_name)
        num_images = len(os.listdir(class_path))
        print(f"   - {class_name}: {num_images} images")

In [None]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

# Compute class weights using train_generator
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train_generator.classes),
    y=train_generator.classes
)

# Convert to dictionary format
class_weight_dict = dict(enumerate(class_weights))

# Print the class weights
print("Class Weights per Class Index:")
for class_index, weight in class_weight_dict.items():
    class_name = list(train_generator.class_indices.keys())[list(train_generator.class_indices.values()).index(class_index)]
    print(f"  {class_index} ({class_name}): {weight:.4f}")


In [None]:
class GradualUnfreezing(tf.keras.callbacks.Callback):
    def __init__(self, base_model, layers_per_unfreeze=5, start_epoch=3, interval=3):
        super().__init__()
        self.base_model = base_model
        self.layers_per_unfreeze = layers_per_unfreeze
        self.start_epoch = start_epoch
        self.interval = interval
        self.unfrozen_layers = 0

    def on_epoch_end(self, epoch, logs=None):
        if epoch >= self.start_epoch and (epoch - self.start_epoch) % self.interval == 0:
            total_layers = len(self.base_model.layers)
            next_unfreeze = self.unfrozen_layers + self.layers_per_unfreeze
            if next_unfreeze <= total_layers:
                for layer in self.base_model.layers[-next_unfreeze: -self.unfrozen_layers or None]:
                    layer.trainable = True
                self.unfrozen_layers = next_unfreeze
                print(f"\n[Gradual Unfreezing] Unfrozen layers: last {self.unfrozen_layers} of {total_layers}")

In [None]:
# model architecture
from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras import layers, regularizers, Model, Input

def build_model(num_classes):
    # Initialize ResNet50V2 base
    base_model = ResNet50V2(
        weights='imagenet',
        include_top=False,
        input_shape=(224, 224, 3)
    )

    # Freeze all layers initially
    for layer in base_model.layers:
        layer.trainable = False

    # Input layer
    inputs = tf.keras.Input(shape=(224, 224, 3))
    x = base_model(inputs)

    # Global pooling and dropout
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)

    # Residual block 1
    x1 = layers.Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(1e-5))(x)
    x1 = layers.BatchNormalization()(x1)
    x1 = layers.Dropout(0.3)(x1)
    x_res = layers.Dense(1024, activation='linear')(x)
    x = layers.Add()([x_res, x1])
    x = layers.Activation('relu')(x)

    # Residual block 2
    x2 = layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l2(1e-5))(x)
    x2 = layers.Dropout(0.3)(x2)
    x_res2 = layers.Dense(512, activation='linear')(x)
    x = layers.Add()([x_res2, x2])
    x = layers.Activation('relu')(x)

    # Residual block 3
    x3 = layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(1e-5))(x)
    x3 = layers.Dropout(0.3)(x3)
    x_res3 = layers.Dense(256, activation='linear')(x)
    x = layers.Add()([x_res3, x3])
    x = layers.Activation('relu')(x)

    # Final output layer
    outputs = layers.Dense(num_classes, activation='softmax')(x)

    # Return fresh model
    return Model(inputs, outputs)

In [None]:
base_model = ResNet50V2(
    weights='imagenet',
    include_top=False,
    input_shape=(target_size[0], target_size[1], channels)
)

model = build_model(num_classes)

In [None]:
# Learning Rate Schedule

initial_learning_rate = 1e-4
warmup_epochs = 5
total_epochs = epochs

def warmup_cosine_decay_schedule(epoch):
    if epoch < warmup_epochs:
        return initial_learning_rate * ((epoch + 1) / warmup_epochs)
    else:
        return initial_learning_rate * (
            0.5 * (1 + np.cos(np.pi * (epoch - warmup_epochs) / (total_epochs - warmup_epochs)))
        )

In [None]:
# Save checkpoints and backup to Google Drive
checkpoint_dir = "/content/drive/MyDrive/fruit_checkpoints"
backup_dir = "/content/drive/MyDrive/fruit_backup"
tensorboard_log_dir = "/content/drive/MyDrive/fruit_tensorboard_logs"

os.makedirs(checkpoint_dir, exist_ok=True)
os.makedirs(backup_dir, exist_ok=True)
os.makedirs(tensorboard_log_dir, exist_ok=True)

In [None]:
# Callbacks

callbacks = [
    # Stop training early if no improvement
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=3,
        restore_best_weights=True,
        min_delta=1e-4,
        verbose=1
    ),

    # Save the best model based on validation accuracy
    tf.keras.callbacks.ModelCheckpoint(
        filepath=os.path.join(checkpoint_dir, 'best_fruit_model_resnet50v2.keras'),
        monitor='val_accuracy',
        save_best_only=True,
        save_weights_only=False,
        mode='max',
        verbose=1
    ),

    # Apply custom warmup and cosine decay learning rate
    tf.keras.callbacks.LearningRateScheduler(warmup_cosine_decay_schedule),

    # Gradually unfreeze base model layers
    GradualUnfreezing(base_model),

    # Automatically back up training in case of interruption
    #tf.keras.callbacks.BackupAndRestore(backup_dir=backup_dir),

    # Log training for TensorBoard visualization
    #tf.keras.callbacks.TensorBoard(log_dir=tensorboard_log_dir, histogram_freq=1)
]

In [None]:
## 1) Manual Search - optimizer tuning

# Compile Model

# 1. AdamW
#from tensorflow.keras.optimizers import AdamW
#from tensorflow.keras.metrics import AUC, Precision, Recall

# Optimizer with weight decay
#optimizer = AdamW(
#    learning_rate=initial_learning_rate,
#    weight_decay=1e-4
#)

# 2. SGD (Momentum-based)
#from tensorflow.keras.optimizers import SGD

#optimizer = SGD(
#    learning_rate=initial_learning_rate,
#    momentum=0.9,
#    nesterov=True
#)

# 3. RMSprop (Hybrid adaptive)
from tensorflow.keras.optimizers import RMSprop

optimizer = RMSprop(
   learning_rate=initial_learning_rate,
    rho=0.9,
    momentum=0.9
)

# according to the plots logged and visualized in mlflow ui the RMSprop optimizer is comparatively is better because the training and validation curves are closely aligned. This indicates stable learning and
# good generalization. no prominent overfitting or underfitting as plots obtained from training with the other optimizers.

# 4. Nadam (Hybrid Adaptive)
#from tensorflow.keras.optimizers import Nadam
#from tensorflow.keras.metrics import AUC, Precision, Recall

#optimizer = Nadam(
#    learning_rate=initial_learning_rate,
#   beta_1=0.9,
#    beta_2=0.999,
#    epsilon=1e-7
#)

# Compile the model
model.compile(
    optimizer=optimizer,
    loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1),
    metrics=[
        'accuracy',
        tf.keras.metrics.AUC(name='auc'),
        tf.keras.metrics.Precision(name='precision'),
        tf.keras.metrics.Recall(name='recall')
    ]
)

In [None]:
model.summary()

In [None]:
import math

# Get class names from train_generator
class_names = [k for k, v in sorted(train_generator.class_indices.items(), key=lambda item: item[1])]

# Set up the grid with 3 rows
n_rows = 3
n_cols = math.ceil(len(class_names) / n_rows)

fig, axes = plt.subplots(n_rows, n_cols, figsize=(5 * n_cols, 5 * n_rows))
axes = axes.flatten()  # Flatten to 1D list for easy indexing

# Track displayed classes
displayed_classes = set()

# Loop until we have one image per class
while len(displayed_classes) < len(class_names):
    images, labels = next(train_generator)
    for i in range(len(images)):
        label_idx = np.argmax(labels[i])
        if label_idx not in displayed_classes:
            axes[label_idx].imshow(images[i])
            axes[label_idx].set_title(f"{class_names[label_idx]}")
            axes[label_idx].axis('off')
            displayed_classes.add(label_idx)
        if len(displayed_classes) == len(class_names):
            break

# Hide any extra subplots (if total grid > number of classes)
for j in range(len(class_names), len(axes)):
    axes[j].axis('off')

plt.tight_layout()
plt.show()

In [None]:
# Display 12 augmented images from the training set in a 4x4 grid with their class labels
plt.figure(figsize=(20, 15))

for i in range(12):
    if i >= len(images):
        break
    ax = plt.subplot(3, 4, i + 1)  # 4 rows, 4 columns = 12 images
    img = images[i] * 255.0
    plt.imshow(img.astype("uint8"))
    plt.title(class_names[np.argmax(labels[i])])
    plt.axis('off')

plt.show()

In [None]:
def log_custom_metrics(history, model, generator):
    # Accuracy/Loss plots
    fig, ax = plt.subplots(1, 2, figsize=(12, 4))
    ax[0].plot(history.history['accuracy'], label='train_acc')
    ax[0].plot(history.history['val_accuracy'], label='val_acc')
    ax[0].legend(); ax[0].set_title("Accuracy")

    ax[1].plot(history.history['loss'], label='train_loss')
    ax[1].plot(history.history['val_loss'], label='val_loss')
    ax[1].legend(); ax[1].set_title("Loss")

    plt.savefig("training_curves.png")
    mlflow.log_artifact("training_curves.png", artifact_path="plots")
    plt.close(fig)

    # Predictions for Confusion Matrix
    y_true = generator.classes
    y_pred = np.argmax(model.predict(generator), axis=1)

    cm = confusion_matrix(y_true, y_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=generator.class_indices.keys())
    disp.plot(cmap="Blues", xticks_rotation=45)
    plt.title("Confusion Matrix")
    plt.savefig("confusion_matrix.png")
    mlflow.log_artifact("confusion_matrix.png", artifact_path="plots")
    plt.close()

    # Classification Report
    report = classification_report(y_true, y_pred, target_names=generator.class_indices.keys())
    with open("classification_report.txt", "w") as f:
        f.write(report)
    mlflow.log_artifact("classification_report.txt", artifact_path="reports")

In [None]:
with start_mlflow_run_auto() as run:
    # Train model
    history = model.fit(
        train_generator,
        steps_per_epoch=len(train_generator),
        validation_data=val_generator,
        validation_steps=len(val_generator),
        epochs=epochs,
        verbose=1,
        callbacks=callbacks,
        class_weight=class_weight_dict
    )

    # Log preprocessing and augmentation info
    mlflow.log_param("normalization", "rescale=1./255")
    mlflow.log_param(
        "train_augmentation",
        "rotation_range=50, width_shift_range=0.3, height_shift_range=0.3, zoom_range=0.3, horizontal_flip=True, brightness_range=(0.4,1.6), fill_mode='nearest', channel_shift_range=40.0, shear_range=25.0"
    )
    mlflow.log_param("target_size", "(224, 224)")
    mlflow.log_param("num_classes", 22)

    # Log model architecture info
    mlflow.log_param(
        "model_architecture",
        "ResNet50V2 base + 3 residual dense blocks + dropout + batchnorm"
    )
    mlflow.log_param("base_model", "ResNet50V2 (imagenet, include_top=False)")
    mlflow.log_param("frozen_layers", "All layers frozen initially")
    mlflow.log_param("activation_functions", "ReLU for hidden layers, Softmax for output")
    mlflow.log_param("regularization", "L2(1e-5)")

    # Log hyperparameters

    mlflow.log_param("num_classes", num_classes)
    mlflow.log_param("batch_size", batch_size)
    mlflow.log_param("epochs", epochs)

    # Log optimizer details from model
    opt_config = model.optimizer.get_config()
    mlflow.log_param("optimizer", model.optimizer.__class__.__name__)
    for k, v in opt_config.items():
        if isinstance(v, (dict, list)):
            v = str(v)
        mlflow.log_param(f"optimizer_{k}", v)

    # Log learning rate schedule parameters
    mlflow.log_param("initial_learning_rate", initial_learning_rate)
    mlflow.log_param("warmup_epochs", warmup_epochs)
    mlflow.log_param("total_epochs", total_epochs)
    mlflow.log_param("lr_schedule", "warmup_cosine_decay_schedule")

    # Evaluate test set
    test_loss, test_accuracy, test_auc, test_precision, test_recall = model.evaluate(test_generator, verbose=1)
    mlflow.log_metric("test_loss", test_loss)
    mlflow.log_metric("test_accuracy", test_accuracy)
    mlflow.log_metric("test_auc", test_auc)
    mlflow.log_metric("test_precision", test_precision)
    mlflow.log_metric("test_recall", test_recall)

    # Log artifacts (plots + metrics)
    log_custom_metrics(history, model, test_generator)

    # Log model & register in MLflow Registry
    try:
        mlflow.keras.log_model(
            model,
            artifact_path="fruit_classifier_model",
            registered_model_name="FruitClassifier"
        )
        print("Model registered successfully in MLflow Model Registry!")
    except MlflowException as e:
        print("Model registration failed:", e)

    print("Run completed with ID:", run.info.run_id)

In [None]:
## 2.1) Grid Search - learning rate and batch size tuning

# hyperparameter grid
# RMSprop is the chosen optimizer
from tensorflow.keras.optimizers import RMSprop

learning_rates = [1e-5, 1e-4, 1e-3]
batch_sizes = [30, 60, 90]

# Keep track of best metrics
best_val_accuracy = 0
best_run_id = None

In [None]:
# Loop through hyperparameters and log each run

for lr in learning_rates:
    for batch in batch_sizes:

        # Update train generator batch size
        train_generator = train_datagen.flow_from_directory(
            '/content/fruit_split_data/train',
            target_size=target_size,
            batch_size=batch,
            class_mode='categorical',
            shuffle=True,
            seed=42
        )

        val_generator = val_datagen.flow_from_directory(
            '/content/fruit_split_data/val',
            target_size=target_size,
            batch_size=batch,
            class_mode='categorical',
            shuffle=False
        )

        # Define optimizer with current learning rate
        optimizer = RMSprop(
            learning_rate=lr,
            rho=0.9,
            momentum=0.9
        )

        model = build_model(num_classes)

        # Recompile the model
        model.compile(
            optimizer=optimizer,
            loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1),
            metrics=[
                'accuracy',
                tf.keras.metrics.AUC(name='auc'),
                tf.keras.metrics.Precision(name='precision'),
                tf.keras.metrics.Recall(name='recall')
            ]
        )

        # Start a new MLflow run
        with start_mlflow_run_auto(run_prefix="RMSprop_Tuning") as run:

            history = model.fit(
                train_generator,
                steps_per_epoch=len(train_generator),
                validation_data=val_generator,
                validation_steps=len(val_generator),
                epochs=epochs,
                verbose=1,
                callbacks=callbacks,
                class_weight=class_weight_dict
            )

            # Log preprocessing / augmentation info
            mlflow.log_param("normalization", "rescale=1./255")
            mlflow.log_param(
                "train_augmentation",
                "rotation_range=50, width_shift_range=0.3, height_shift_range=0.3, zoom_range=0.3, horizontal_flip=True, brightness_range=(0.4,1.6), fill_mode='nearest', channel_shift_range=40.0, shear_range=25.0"
            )
            mlflow.log_param("target_size", "(224, 224)")
            mlflow.log_param("num_classes", 22)

            # Log model architecture info
            mlflow.log_param(
                "model_architecture",
                "ResNet50V2 base + 3 residual dense blocks + dropout + batchnorm"
            )
            mlflow.log_param("base_model", "ResNet50V2 (imagenet, include_top=False)")
            mlflow.log_param("frozen_layers", "All layers frozen initially")
            mlflow.log_param("activation_functions", "ReLU for hidden layers, Softmax for output")
            mlflow.log_param("regularization", "L2(1e-5)")

            # Log hyperparameters
            mlflow.log_param("optimizer", "RMSprop")
            mlflow.log_param("learning_rate", lr)
            mlflow.log_param("batch_size", batch)

            # Evaluate on test set
            test_metrics = model.evaluate(test_generator, verbose=0)
            metric_names = ["loss", "accuracy", "auc", "precision", "recall"]
            for name, value in zip(metric_names, test_metrics):
                mlflow.log_metric(f"test_{name}", value)

            # Log model and artifacts
            log_custom_metrics(history, model, test_generator)
            mlflow.keras.log_model(model, artifact_path="fruit_classifier_model")

            # Track best validation accuracy ( when logged val_accuracy comes from Keras during training, while MLflow recalculates validation_accuracy
            # afterward on the full validation set, causing slight differences.)
            val_acc = max(history.history['val_accuracy'])
            if val_acc > best_val_accuracy:
                best_val_accuracy = val_acc
                best_run_id = run.info.run_id

In [None]:
import mlflow
from mlflow.tracking import MlflowClient
import matplotlib.pyplot as plt
import numpy as np
import random

In [None]:
# function to train and log runs for random search and Bayesian optimization
def train_and_log_run(lr, batch_size, run_prefix="Run"):

    # End any leftover active run
    if mlflow.active_run() is not None:
        mlflow.end_run()

    # Initialize tracking variables inside the function
    best_val_accuracy = -float("inf")
    best_run_id = None

    # Create data generators
    train_generator = train_datagen.flow_from_directory(
        '/content/fruit_split_data/train',
        target_size=target_size,
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=True,
        seed=42
    )
    val_generator = val_datagen.flow_from_directory(
        '/content/fruit_split_data/val',
        target_size=target_size,
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=False
    )

    # Build and compile model
    optimizer = RMSprop(learning_rate=lr, rho=0.9, momentum=0.9)
    model = build_model(num_classes)
    model.compile(
        optimizer=optimizer,
        loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1),
        metrics=[
            'accuracy',
            tf.keras.metrics.AUC(name='auc'),
            tf.keras.metrics.Precision(name='precision'),
            tf.keras.metrics.Recall(name='recall')
        ]
    )

    # Start MLflow run
    with start_mlflow_run_auto(run_prefix=run_prefix, nested=True) as run:
        history = model.fit(
            train_generator,
            steps_per_epoch=len(train_generator),
            validation_data=val_generator,
            validation_steps=len(val_generator),
            epochs=epochs,
            verbose=1,
            callbacks=callbacks,
            class_weight=class_weight_dict
        )

        # Log hyperparameters
        mlflow.log_param("optimizer", "RMSprop")
        mlflow.log_param("learning_rate", lr)
        mlflow.log_param("batch_size", batch_size)

        # Log preprocessing and augmentation info
        mlflow.log_param("normalization", "rescale=1./255")
        mlflow.log_param(
            "train_augmentation",
            "rotation_range=50, width_shift_range=0.3, height_shift_range=0.3, zoom_range=0.3, horizontal_flip=True, brightness_range=(0.4,1.6), fill_mode='nearest', channel_shift_range=40.0, shear_range=25.0"
        )
        mlflow.log_param("target_size", "(224, 224)")
        mlflow.log_param("num_classes", 22)

        # Log model architecture info
        mlflow.log_param(
            "model_architecture",
            "ResNet50V2 base + 3 residual dense blocks + dropout + batchnorm"
        )
        mlflow.log_param("base_model", "ResNet50V2 (imagenet, include_top=False)")
        mlflow.log_param("frozen_layers", "All layers frozen initially")
        mlflow.log_param("activation_functions", "ReLU for hidden layers, Softmax for output")
        mlflow.log_param("regularization", "L2(1e-5)")

        # Evaluate test set
        test_metrics = model.evaluate(test_generator, verbose=0)
        metric_names = ["loss", "accuracy", "auc", "precision", "recall"]
        for name, value in zip(metric_names, test_metrics):
            mlflow.log_metric(f"test_{name}", value)

        # Log artifacts and history plots
        log_custom_metrics(history, model, test_generator)

        # Track best validation accuracy
        val_acc = max(history.history['val_accuracy'])
        if val_acc > best_val_accuracy:
            best_val_accuracy = val_acc
            best_run_id = run.info.run_id

        # Log best val accuracy for this run
        mlflow.log_metric("best_val_accuracy", best_val_accuracy)

        mlflow.keras.log_model(model, artifact_path=f"fruit_classifier_model_{best_run_id}")

In [None]:
## 2.2) Random Search - learning rate and batch size tuning

random_runs = []
for _ in range(9):  # 9 random trials
    lr = 10 ** random.uniform(-5, -2)  # 1e-5 to 1e-2
    bs = random.choice([30, 60, 90, 120])
    run_info = train_and_log_run(lr, bs, run_prefix="Random")
    random_runs.append(run_info)

In [None]:
## 2.3) Bayesian Optimization - learning rate and batch size tuning
bayesian_runs = []
lr_candidates = [1e-4, 3e-4, 5e-4] # 9 times
batch_candidates = [30, 60, 90]

for lr in lr_candidates:
    for bs in batch_candidates:
        run_info = train_and_log_run(lr, bs, run_prefix="Bayesian")
        bayesian_runs.append(run_info)

In [None]:
# function to compute overfit and and score
from mlflow.tracking import MlflowClient

def compute_overfit_and_score(runs, name_prefix=None):
    """
    Compute overfit gap and composite score for existing MLflow runs.
    Optionally filter by run_name prefix (e.g., 'RMSprop_Tuning', 'Random', 'Bayesian').

    Args:
        runs: list of MLflow run objects
        name_prefix: string, if provided, only include runs whose run_name starts with this prefix

    Returns:
        list of dicts with 'run_id', 'train_acc', 'val_acc', 'val_auc', 'overfit_gap', 'score'
    """
    processed_runs = []

    for run in runs:
        if name_prefix and not run.info.run_name.startswith(name_prefix):
            continue  # skip runs not matching the prefix

        metrics = run.data.metrics
        train_acc = metrics.get("accuracy", 0)
        val_acc = metrics.get("val_accuracy", 0)
        val_auc = metrics.get("val_auc", 0)

        overfit_gap = train_acc - val_acc
        score = val_acc + val_auc

        processed_runs.append({
            "run_id": run.info.run_id,
            "run_name": run.info.run_name,
            "train_acc": train_acc,
            "val_acc": val_acc,
            "val_auc": val_auc,
            "overfit_gap": overfit_gap,
            "score": score
        })
    return processed_runs

# Fetch all runs from the experiment
client = MlflowClient()
experiment_name = "Fruit_ResNet50V2"
experiment = mlflow.get_experiment_by_name(experiment_name)
all_runs = client.search_runs([experiment.experiment_id])

# Filter & compute overfit/score per method
grid_runs = compute_overfit_and_score(all_runs, name_prefix="RMSprop_Tuning")
random_runs = compute_overfit_and_score(all_runs, name_prefix="Random")
bayesian_runs = compute_overfit_and_score(all_runs, name_prefix="Bayesian")

In [None]:
def get_top_runs(runs, top_n=5):
    """
    Select top runs that balance generalization (small |overfit_gap|) and good validation performance.

    Args:
        runs (list): List of dicts with keys 'overfit_gap' and 'score'
        top_n (int): Number of top runs to return

    Returns:
        list: Top N runs sorted by (|overfit_gap| ascending, score descending)
    """
    if not runs:
        return []

    # Sort by smallest absolute overfit gap (avoiding both overfit and underfit)
    # and highest composite score (val_acc + val_auc)
    sorted_runs = sorted(runs, key=lambda x: (abs(x["overfit_gap"]), -x["score"]))
    return sorted_runs[:top_n]

In [None]:
# Top 5 selection per method

top_grid = get_top_runs(grid_runs)
top_random = get_top_runs(random_runs)
top_bayesian = get_top_runs(bayesian_runs)

In [None]:
import matplotlib.pyplot as plt
from mlflow.tracking import MlflowClient

def plot_runs(top_runs, method_name="Method"):
    client = MlflowClient()

    for i, run_info in enumerate(top_runs):
        run_id = run_info["run_id"]
        run_name = run_info.get("run_name", run_id)

        # Retrieve per-epoch metric history
        def fetch_metric_series(metric_key):
            try:
                history = client.get_metric_history(run_id, metric_key)
                return [m.value for m in history]
            except Exception:
                return []

        train_acc = fetch_metric_series("accuracy")
        val_acc = fetch_metric_series("val_accuracy")
        train_loss = fetch_metric_series("loss")
        val_loss = fetch_metric_series("val_loss")

        # Plot Accuracy
        if train_acc and val_acc:
            plt.figure(figsize=(6, 4))
            plt.plot(train_acc, label="Train Accuracy")
            plt.plot(val_acc, label="Validation Accuracy")
            plt.title(f"{method_name} Top Run {i+1}: {run_name} — Accuracy")
            plt.xlabel("Epoch")
            plt.ylabel("Accuracy")
            plt.legend()
            plt.show()

        # Plot Loss
        if train_loss and val_loss:
            plt.figure(figsize=(6, 4))
            plt.plot(train_loss, label="Train Loss")
            plt.plot(val_loss, label="Validation Loss")
            plt.title(f"{method_name} Top Run {i+1}: {run_name} — Loss")
            plt.xlabel("Epoch")
            plt.ylabel("Loss")
            plt.legend()
            plt.show()


In [None]:
# Plot top runs
plot_runs(top_grid, "GridSearch")
plot_runs(top_random, "RandomSearch")
plot_runs(top_bayesian, "BayesianSearch")

In [None]:
# Compare overall best
all_top = top_grid + top_random + top_bayesian
overall_best = sorted(all_top, key=lambda x: (x["overfit_gap"], -x["score"]))[0]
best_run_id = overall_best["run_id"]
best_method = "GridSearch" if overall_best in top_grid else "RandomSearch" if overall_best in top_random else "BayesianSearch"
print(f"Overall best run: {best_run_id} from {best_method}")
import mlflow

# Fetch run metadata
best_run = mlflow.get_run(best_run_id)
best_run_name = best_run.info.run_name

print(f"Overall best run: {best_run_id} ({best_run_name}) from {best_method}")


In [None]:
# Register & promote

from mlflow.tracking import MlflowClient
from mlflow.exceptions import MlflowException
import mlflow

client = MlflowClient()

# Best run info
best_run_id = "0f2f64255e4946808e7f2fe71575bdb1"
best_run_name = mlflow.get_run(best_run_id).info.run_name  # "Bayesian_26"

# Generate model registry name
model_name = f"Fruit_Classifier_{best_run_name}"             # "Fruit_Classifier_Bayesian_26"

# Artifact path (matches how you logged it)
model_path_in_run = f"fruit_classifier_model_{best_run_id}"

# Ensure registered model exists
try:
    client.get_registered_model(model_name)
except MlflowException:
    client.create_registered_model(model_name)

# Register the version
model_version = client.create_model_version(
    name=model_name,
    source=f"runs:/{best_run_id}/{model_path_in_run}",
    run_id=best_run_id
)

# Promote to Staging
client.transition_model_version_stage(
    name=model_name,
    version=model_version.version,
    stage="Staging"
)

print(f"Model '{model_name}' version {model_version.version} promoted to Staging.")

In [None]:
# Optional manual override
"""
manual_best_run_id = "REPLACE_WITH_RUN_ID"
client.transition_model_version_stage(name=model_name, version=model_version.version, stage="Archived")
manual_model_version = client.create_model_version(
    name=model_name,
    source=f"runs:/{manual_best_run_id}/fruit_classifier_model",
    run_id=manual_best_run_id
)
client.transition_model_version_stage(name=model_name, version=manual_model_version.version, stage="Staging")
"""

In [None]:
# Promote to Production (after validating performance, stability, and deployment readiness in Staging.)

client.transition_model_version_stage(
    name=model_name,
    version=model_version.version,
    stage="Production"
)

print(f"Best model (Run ID: {best_run_id}) is now in Production.")

In [None]:
import os

# Path to save model
save_path = "/content/best_fruit_model_resnet50v2.h5"

# Save entire model
model.save(save_path)
print(f"Model saved at {save_path}")

In [None]:
from google.colab import files

files.download(save_path)