In [None]:
!pip install mlflow -q

In [None]:
from enum import Enum

from tensorflow.keras.applications import MobileNetV2, ResNet50
from tensorflow.keras.applications.mobilenet_v2 import (
    preprocess_input as preprocess_input_mobilenet,
)
from tensorflow.keras.applications.resnet import (
    preprocess_input as preprocess_input_resnet,
)
from tensorflow.keras.optimizers import SGD, Adam, RMSprop


class OptimizerType(Enum):
    ADAM = Adam
    SGD = SGD
    RMSPROP = RMSprop


class CNNType(str, Enum):
    RESNET = ResNet50
    MOBILENETV2 = MobileNetV2


optimizer_map = {
    "adam": OptimizerType.ADAM,
    "sgd": OptimizerType.SGD,
    "rmsprop": OptimizerType.RMSPROP,
}
cnn_map = {"resnet50": CNNType.RESNET, "mobilenetv2": CNNType.MOBILENETV2}
preprocess_img_map = {
    "resnet50": preprocess_input_resnet,
    "mobilenetv2": preprocess_input_mobilenet,
}

# ------ HYPERPARAMETERS (Default Values) ------
BATCH_SIZE = 12
EPOCHS = 10
LEARNING_RATE = 0.001
OPTIMIZER_str = "adam"
CNN_str = "resnet50"
FINETUNE_DEPTH = 100

# ------ RUN_ID ------
PREFECT_RUN_ID = "Default"
EXPERIMENT_NAME = "Default Experiment"
RUN_NAME = "Avocado_Ripening_Model"
MLFLOW_URI = "https://mlflow.lepcodes.com"
MODEL_PATH = "/kaggle/working/feature_extraction_model.keras"
MODEL_PATH_FT = "/kaggle/working/fine_tuned_model.keras"

In [None]:
OPTIMIZER = optimizer_map[OPTIMIZER_str]
CNN_MODEL = cnn_map[CNN_str]
PREPROCESS_IMG = preprocess_img_map[CNN_str]
print(f"OPTIMIZER: {OPTIMIZER}")
print(f"CNN: {CNN_MODEL}")
print(f"BATCH_SIZE: {BATCH_SIZE}")
print(f"EPOCHS: {EPOCHS}")
print(f"LEARNING_RATE: {LEARNING_RATE}")
print(f"Image Preprocessor: {PREPROCESS_IMG}")

## Import Dependencies


In [None]:
import os

import pandas as pd
import requests
import tensorflow as tf
from tensorflow.keras.models import load_model

import mlflow

# Import Preprocessed Dataset


In [None]:
data = pd.read_csv("/kaggle/input/avocado-ripening-dataset/data.csv")

In [None]:
data.head()

### Define Paths of Images


In [None]:
image_folder_path = "/kaggle/input/avocado-ripening-dataset/images"
data["File Name"] = data["File Name"].apply(
    lambda x: os.path.join(image_folder_path, x + ".jpg")
)

In [None]:
data.head()

# Create Data Pipelines


### Extract Shelf-life days expectancy


In [None]:
shelf_life_expectancy = data["Shelf-life Days"]
shelf_life_expectancy.shape

### Extract Storage Condition Temperature


In [None]:
storage_condition = data[["T10", "T20", "Tam"]].values.astype("float")
print(storage_condition.shape)

### Extract Image Paths


In [None]:
image_paths = data["File Name"]
image_paths.shape

### Create Dataset


In [None]:
DATASET_SIZE = len(data)
SHUFFLE_BUFFER_SIZE = 1000

# Start with the dataset of file paths and labels
full_dataset = tf.data.Dataset.from_tensor_slices(
    (
        {"image_input": image_paths, "condition_input": storage_condition},
        shelf_life_expectancy,
    )
)

# Shuffle the file paths.
full_dataset = full_dataset.shuffle(SHUFFLE_BUFFER_SIZE)

### Function to load each image from path


In [None]:
def load_and_preprocess_multi_input(inputs, output):
    """
    Loads the image and passes through the condition features.
    """
    image_path = inputs["image_input"]
    condition = inputs["condition_input"]

    # Load and process the image (without the final /255.0 scaling)
    image = tf.io.read_file(image_path)
    image = tf.io.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [224, 224])  # Note: 800x800 is large and slow

    # Return the data in the same dictionary structure
    return {"image_input": image, "condition_input": condition}, output

## Create Training Data Pipeline


In [None]:
train_dataset = full_dataset.take(int(0.8 * DATASET_SIZE))
train_dataset = train_dataset.map(
    load_and_preprocess_multi_input, num_parallel_calls=tf.data.AUTOTUNE
)
train_dataset = train_dataset.batch(BATCH_SIZE)
train_dataset = train_dataset.prefetch(tf.data.AUTOTUNE)

## Create Testing Data Pippeline


In [None]:
test_dataset = full_dataset.skip(int(0.8 * DATASET_SIZE))
test_dataset = test_dataset.map(
    load_and_preprocess_multi_input, num_parallel_calls=tf.data.AUTOTUNE
)
test_dataset = test_dataset.batch(BATCH_SIZE)
test_dataset = test_dataset.prefetch(tf.data.AUTOTUNE)

In [None]:
print(train_dataset.cardinality())
print(test_dataset.cardinality())

# Create Model (Feature Extraction / Only Dense Layer)


## Build Model Function


In [None]:
import tensorflow.keras as keras


def build_model():
    # Defining Input Layer
    image_input = keras.layers.Input(shape=(224, 224, 3), name="image_input")
    condition_input = keras.layers.Input(shape=(3,), name="condition_input")

    # Data Augmentation Layer
    data_augmentation = keras.Sequential(
        [
            # Geometric Augmentations
            keras.layers.RandomFlip("horizontal_and_vertical"),
            keras.layers.RandomRotation(0.2),
            keras.layers.RandomZoom(0.2),
            # Lighting and Color Augmentations
            keras.layers.RandomBrightness(factor=0.1),
            keras.layers.RandomContrast(factor=0.1),
        ],
        name="data_augmentation",
    )

    # CNN Base Model
    base_model = CNN_MODEL(
        input_shape=(224, 224, 3), include_top=False, weights="imagenet", name=CNN_str
    )
    base_model.trainable = False

    # Build Image Preprocessing Layer
    x = data_augmentation(image_input)
    x = PREPROCESS_IMG(x)
    x = base_model(x, training=False)
    x = keras.layers.GlobalAveragePooling2D()(x)
    x = keras.layers.Dropout(0.2)(x)
    image_features = keras.layers.Dense(512, activation="relu", name="image_features")(
        x
    )

    # Combine Image Features and Avocado Condition
    combined_features = keras.layers.Concatenate(name="feature_concatenate")(
        [image_features, condition_input]
    )

    # Final Regressor
    x = keras.layers.Dense(128, activation="relu")(combined_features)
    x = keras.layers.Dense(32, activation="relu")(x)
    output = keras.layers.Dense(1, activation="linear", name="shelf_life_output")(x)

    model = keras.Model(
        inputs=[image_input, condition_input],
        outputs=output,
        name="avocado_shelf_life_model",
    )
    return model


## Compile Model Function


In [None]:
optimizer = OPTIMIZER.value
print(optimizer)

In [None]:
def compile_model(model, lr):
    optimizer_class = OPTIMIZER.value
    print(optimizer_class)
    model.compile(
        optimizer=optimizer_class(learning_rate=lr),
        loss="mse",
        metrics=[
            tf.keras.metrics.MeanAbsoluteError(name="mae"),
            tf.keras.metrics.R2Score(name="r2_score"),
        ],
    )
    return model

# Train Model


## Callback Function


In [None]:
def create_callbacks(path):
    early_stopping = keras.callbacks.EarlyStopping(
        monitor="val_loss",  # Monitor the validation loss
        patience=10,  # Stop if it doesn't improve for 10 epochs
        restore_best_weights=True,  # Automatically restore the model weights from the best epoch
    )
    model_checkpoint = keras.callbacks.ModelCheckpoint(
        filepath=path,  # The path to save the model
        monitor="val_loss",  # Monitor the validation loss
        save_best_only=True,  # Only save if the model is the best so far
        verbose=1,
    )
    return [early_stopping, model_checkpoint]

## Start Experiment Run in MLFlow


In [None]:
try:
    response = requests.get(f"{MLFLOW_URI}/health")
    if response.status_code != 200 and response.status_code != 404:
        print(
            f"Server responded with code {response.status_code}, but seems to be alive."
        )
    else:
        print("Connection stablished with MLflow server.")

    mlflow.set_tracking_uri(MLFLOW_URI)
    print(f"Tracking URI set: {mlflow.get_tracking_uri()}")

except Exception as e:
    raise Exception(f"CRITICAL ERROR: Cannot connect to MLFlow. Cause: {e}")

In [None]:
try:
    mlflow.set_experiment(EXPERIMENT_NAME)
    print(f'Experiment "{EXPERIMENT_NAME}" set!')
except:
    try:
        mlflow.create_experiment(
            name=EXPERIMENT_NAME, artifact_store="mlflow-artifacts:/"
        )
        mlflow.set_experiment(EXPERIMENT_NAME)
        print(f"{EXPERIMENT_NAME} don't existed. Created!")
    except Exception:
        raise Exception("Error configuring experiment")

In [None]:
model = build_model()
model = compile_model(model, LEARNING_RATE)
callbacks = create_callbacks(MODEL_PATH)

with mlflow.start_run(run_name=RUN_NAME + "_Head_Only") as run:
    print(f"Run {RUN_NAME} starting with ID: {run.info.run_id}")
    print(f"Prefect Run ID: {PREFECT_RUN_ID}")
    mlflow.set_tag("prefect_run_id", PREFECT_RUN_ID)

    mlflow.tensorflow.autolog(log_models=False, log_datasets=False)

    model.fit(
        train_dataset, epochs=EPOCHS, validation_data=test_dataset, callbacks=callbacks
    )

    print("Loading best model from memory!")
    best_model = load_model(MODEL_PATH)

    print("Uploading best model to MLflow...")
    mlflow.tensorflow.log_model(best_model, "avocado-model")

# Fine-Tune Model


### Build Another Model


In [None]:
model_ft = best_model

## Unfreeze Base Model Weights


In [None]:
base_model = model_ft.get_layer(CNN_str)
base_model.trainable = True

## Unfreeze Last Layers of CNN


In [None]:
def unfreeze_last_n_layers(base_model, n_layers: int):
    """
    Unfreezes the last N layers of a model, while keeping
    BatchNormalization layers frozen to prevent statistical shift.
    """
    # 1. Start by allowing training globally
    base_model.trainable = True

    total_layers = len(base_model.layers)
    cutoff_index = total_layers - n_layers

    unfrozen_count = 0

    print(
        f"\nðŸ”“ Attempting to unfreeze last {n_layers} layers (starting from index {cutoff_index})..."
    )

    for i, layer in enumerate(base_model.layers):
        # RULE 1: Everything before the cutoff is frozen
        if i < cutoff_index:
            layer.trainable = False
            continue

        # RULE 2: ALWAYS freeze BatchNormalization
        if isinstance(layer, tf.keras.layers.BatchNormalization):
            layer.trainable = False
            print(f"   ðŸš« Layer {i} ({layer.name}) is BatchNorm -> FROZEN")
        else:
            layer.trainable = True
            unfrozen_count += 1

    print(f"âœ… Actually unfrozen: {unfrozen_count} layers (BatchNums skipped).")
    return base_model

In [None]:
base_model = unfreeze_last_n_layers(base_model, FINETUNE_DEPTH)

## Train and Start Experiment in MLFlow


In [None]:
LEARNING_RATE_FT = LEARNING_RATE * 0.01
model_ft = compile_model(model, LEARNING_RATE_FT)
callbacks = create_callbacks(MODEL_PATH_FT)

with mlflow.start_run(run_name=RUN_NAME + "_Fine_Tuned") as run:
    print(f"Run {RUN_NAME} starting with ID: {run.info.run_id}")
    print(f"Prefect Run ID: {PREFECT_RUN_ID}")
    mlflow.set_tag("prefect_run_id", PREFECT_RUN_ID)

    mlflow.tensorflow.autolog(log_models=False, log_datasets=False)

    model_ft.fit(
        train_dataset, epochs=EPOCHS, validation_data=test_dataset, callbacks=callbacks
    )

    print("Loading best Fine Tuned model from memory!")
    best_model = load_model(MODEL_PATH_FT)

    print("Uploading best Fine Tuned model to MLflow...")
    mlflow.tensorflow.log_model(best_model, "avocado-model")