In [4]:
!pip install mlflow -q

In [5]:
from enum import Enum

from tensorflow.keras.applications.mobilenet_v2 import (
    preprocess_input as preprocess_input_mobilenet,
)
from tensorflow.keras.applications import ResNet50, MobileNetV2
from tensorflow.keras.applications.resnet import (
    preprocess_input as preprocess_input_resnet,
)
from tensorflow.keras.optimizers import SGD, Adam, RMSprop


class OptimizerType(Enum):
    ADAM = Adam
    SGD = SGD
    RMSPROP = RMSprop


class CNNType(str, Enum):
    RESNET = ResNet50
    MOBILENET = MobileNetV2


optimizer_map = {
    "adam": OptimizerType.ADAM,
    "sgd": OptimizerType.SGD,
    "rmsprop": OptimizerType.RMSPROP,
}
cnn_map = {"resnet50": CNNType.RESNET, "mobilenetv2": CNNType.MOBILENET}
preprocess_img_map = {
    "resnet50": preprocess_input_resnet,
    "mobilenetv2": preprocess_input_mobilenet,
}

# ------ HYPERPARAMETERS (Default Values) ------
BATCH_SIZE = 12
EPOCHS = 10
LEARNING_RATE = 0.001
OPTIMIZER_str = "adam"
CNN_str = "resnet50"

# ------ RUN_ID ------
PREFECT_RUN_ID = "Default"
EXPERIMENT_NAME = "Default Experiment"
RUN_NAME = "Avocado_Ripening_Model"
MLFLOW_URI = "https://mlflow.lepcodes.com"
MODEL_PATH = "/kaggle/working/feature_extraction_model.keras"

In [6]:
OPTIMIZER = optimizer_map[OPTIMIZER_str]
CNN_MODEL = cnn_map[CNN_str]
PREPROCESS_IMG = preprocess_img_map[CNN_str]
print(f"OPTIMIZER: {OPTIMIZER}")
print(f"CNN: {CNN_MODEL}")
print(f"BATCH_SIZE: {BATCH_SIZE}")
print(f"EPOCHS: {EPOCHS}")
print(f"LEARNING_RATE: {LEARNING_RATE}")
print(f"Image Preprocessor: {PREPROCESS_IMG}")

OPTIMIZER: OptimizerType.ADAM
CNN: <function ResNet50 at 0x7953e8c91a80>
BATCH_SIZE: 12
EPOCHS: 10
LEARNING_RATE: 0.001
Image Preprocessor: <function preprocess_input at 0x7953e8c91c60>


## Import Dependencies

In [38]:
import pandas as pd
import numpy as np
from PIL import Image
from tensorflow.keras.models import load_model
import tensorflow as tf
import os
import mlflow
import requests

# Import Preprocessed Dataset

In [8]:
data = pd.read_csv("/kaggle/input/avocado-ripening-dataset/data.csv")

In [9]:
data.head()

Unnamed: 0,File Name,T10,T20,Tam,Shelf-life Days
0,T20_d01_001_a_1,0,1,0,7.824977
1,T20_d01_001_b_1,0,1,0,7.824884
2,T20_d02_001_a_1,0,1,0,7.019815
3,T20_d02_001_b_1,0,1,0,7.019525
4,T20_d03_001_a_2,0,1,0,5.983368


### Define Paths of Images

In [10]:
image_folder_path = "/kaggle/input/avocado-ripening-dataset/images"
data['File Name'] = data["File Name"].apply(lambda x: os.path.join(image_folder_path, x + '.jpg'))

In [11]:
data.head()

Unnamed: 0,File Name,T10,T20,Tam,Shelf-life Days
0,/kaggle/input/avocado-ripening-dataset/images/...,0,1,0,7.824977
1,/kaggle/input/avocado-ripening-dataset/images/...,0,1,0,7.824884
2,/kaggle/input/avocado-ripening-dataset/images/...,0,1,0,7.019815
3,/kaggle/input/avocado-ripening-dataset/images/...,0,1,0,7.019525
4,/kaggle/input/avocado-ripening-dataset/images/...,0,1,0,5.983368


# Create Data Pipelines

### Extract Shelf-life days expectancy

In [12]:
shelf_life_expectancy = data['Shelf-life Days']
shelf_life_expectancy.shape

(6830,)

### Extract Storage Condition Temperature

In [13]:
storage_condition = data[['T10', 'T20', 'Tam']].values.astype('float')
print(storage_condition.shape)

(6830, 3)


### Extract Image Paths

In [14]:
image_paths = data['File Name']
image_paths.shape

(6830,)

### Create Dataset

In [15]:
DATASET_SIZE = len(data)
SHUFFLE_BUFFER_SIZE = 1000

# Start with the dataset of file paths and labels
full_dataset = tf.data.Dataset.from_tensor_slices((
    {
        'image_input': image_paths,
        'condition_input': storage_condition
    },   
    shelf_life_expectancy
))

# Shuffle the file paths.
full_dataset = full_dataset.shuffle(SHUFFLE_BUFFER_SIZE)

I0000 00:00:1765082990.261134      47 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1765082990.261737      47 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5


### Function to load each image from path

In [16]:
def load_and_preprocess_multi_input(inputs, output):
    """
    Loads the image and passes through the condition features.
    """
    image_path = inputs['image_input']
    condition = inputs['condition_input']
    
    # Load and process the image (without the final /255.0 scaling)
    image = tf.io.read_file(image_path)
    image = tf.io.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [224, 224]) # Note: 800x800 is large and slow
    
    # Return the data in the same dictionary structure
    return {'image_input': image, 'condition_input': condition}, output

## Create Training Data Pipeline

In [17]:
train_dataset = full_dataset.take(int(0.8 * DATASET_SIZE))
train_dataset = train_dataset.map(load_and_preprocess_multi_input, num_parallel_calls=tf.data.AUTOTUNE)
train_dataset = train_dataset.batch(BATCH_SIZE)
train_dataset = train_dataset.prefetch(tf.data.AUTOTUNE)

## Create Testing Data Pippeline

In [18]:
test_dataset = full_dataset.skip(int(0.8 * DATASET_SIZE))
test_dataset = test_dataset.map(load_and_preprocess_multi_input, num_parallel_calls=tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE)
test_dataset = test_dataset.prefetch(tf.data.AUTOTUNE)

In [19]:
print(train_dataset.cardinality())
print(test_dataset.cardinality())

tf.Tensor(456, shape=(), dtype=int64)
tf.Tensor(114, shape=(), dtype=int64)


# Create Model (Feature Extraction / Only Dense Layer)

## Build Model Function

In [20]:
import tensorflow.keras as keras
def build_model():
    # Defining Input Layer
    image_input = keras.layers.Input(shape=(224,224,3), name='image_input')
    condition_input = keras.layers.Input(shape=(3,), name='condition_input')

    # Data Augmentation Layer
    data_augmentation = keras.Sequential([
        # Geometric Augmentations
        keras.layers.RandomFlip('horizontal_and_vertical'),
        keras.layers.RandomRotation(0.2),
        keras.layers.RandomZoom(0.2),
        # Lighting and Color Augmentations
        keras.layers.RandomBrightness(factor=0.1),
        keras.layers.RandomContrast(factor=0.1)
    ], name='data_augmentation')
    
    # CNN Base Model
    base_model = CNN_MODEL(
        input_shape=(224, 224, 3),
        include_top=False,
        weights='imagenet'
    )
    base_model.trainable = False

    # Build Image Preprocessing Layer
    x = data_augmentation(image_input)
    x = PREPROCESS_IMG(x)
    x = base_model(x, training=False)
    x = keras.layers.GlobalAveragePooling2D()(x)
    x = keras.layers.Dropout(0.2)(x)
    image_features = keras.layers.Dense(512, activation='relu', name='image_features')(x)

    # Combine Image Features and Avocado Condition 
    combined_features = keras.layers.Concatenate(name='feature_concatenate')([image_features, condition_input])

    # Final Regressor
    x = keras.layers.Dense(128, activation='relu')(combined_features)
    x = keras.layers.Dense(32, activation='relu')(x)
    output = keras.layers.Dense(1, activation='linear', name='shelf_life_output')(x)

    model = keras.Model(
        inputs=[image_input, condition_input], 
        outputs=output,
        name='avocado_shelf_life_model'
    )
    return model


## Compile Model Function

In [21]:
optimizer = OPTIMIZER.value
print(optimizer)

<class 'keras.src.optimizers.adam.Adam'>


In [22]:
def compile_model(model, lr):
    optimizer_class = OPTIMIZER.value
    print(optimizer_class)
    model.compile(
        optimizer=optimizer_class(learning_rate=lr),
        loss='mse',
        metrics=[
            tf.keras.metrics.MeanAbsoluteError(name='mae'),
            tf.keras.metrics.R2Score(name='r2_score')
        ]
    )
    return model

# Train Model

## Callback Function

In [23]:
def create_callbacks(path):
    early_stopping = keras.callbacks.EarlyStopping(
        monitor='val_loss',       # Monitor the validation loss
        patience=10,              # Stop if it doesn't improve for 10 epochs
        restore_best_weights=True # Automatically restore the model weights from the best epoch
    )
    model_checkpoint = keras.callbacks.ModelCheckpoint(
        filepath=path,                    # The path to save the model
        monitor='val_loss',               # Monitor the validation loss
        save_best_only=True,              # Only save if the model is the best so far
        verbose=1
    )
    return [early_stopping, model_checkpoint]

## Start Experiment Run in MLFlow

In [30]:
try:
    response = requests.get(f"{MLFLOW_URI}/health")
    if response.status_code != 200 and response.status_code != 404:
        print(
            f"Server responded with code {response.status_code}, but seems to be alive."
        )
    else:
        print("Connection stablished with MLflow server.")

    mlflow.set_tracking_uri(MLFLOW_URI)
    print(f"Tracking URI set: {mlflow.get_tracking_uri()}")

except Exception as e:
    raise Exception(f"CRITICAL ERROR: Cannot connect to MLFlow. Cause: {e}")

Connection stablished with MLflow server.
Tracking URI set: https://mlflow.lepcodes.com


In [31]:
try:
    mlflow.set_experiment(EXPERIMENT_NAME)
    print(f"Experiment \"{EXPERIMENT_NAME}\" set!")
except:
    try:
        mlflow.create_experiment(
            name=EXPERIMENT_NAME,
            artifact_store="mlflow-artifacts:/"
        )
        mlflow.set_experiment(EXPERIMENT_NAME)
        print(f"{EXPERIMENT_NAME} don't existed. Created!")        
    except Exception as e:
        raise Exception(f"Error configuring experiment")

Experiment "Default Experiment" set!


In [39]:
model = build_model()
model = compile_model(model, LEARNING_RATE)
callbacks = create_callbacks(MODEL_PATH)

with mlflow.start_run(run_name=RUN_NAME) as run:
    print(f"Run {RUN_NAME} starting with ID: {run.info.run_id}")
    print(f"Prefect Run ID: {PREFECT_RUN_ID}")
    mlflow.set_tag("prefect_run_id", PREFECT_RUN_ID)

    mlflow.tensorflow.autolog(log_models=False, log_datasets=False)
    
    history = model.fit(
        train_dataset, 
        epochs=1, 
        validation_data=test_dataset,
        callbacks=callbacks
    )

    print("Loading best model from memory!")
    best_model = load_model(MODEL_PATH)

    print("Uploading best model to MLflow...")
    mlflow.tensorflow.log_model(best_model, "avocado-model")

<class 'keras.src.optimizers.adam.Adam'>
Run Avocado_Ripening_Model starting with ID: eff6576775db4564ab8b105915e0cfe6
Prefect Run ID: Default


[1m455/456[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m‚îÅ[0m [1m0s[0m 61ms/step - loss: 20.8750 - mae: 3.4221 - r2_score: 0.4868
Epoch 1: val_loss improved from inf to 12.72693, saving model to /kaggle/working/feature_extraction_model.keras




[1m456/456[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m45s[0m 83ms/step - loss: 20.8497 - mae: 3.4201 - r2_score: 0.4873 - val_loss: 12.7269 - val_mae: 2.9780 - val_r2_score: 0.6388
Loading best model from memory!




Uploading best model to MLflow...




üèÉ View run Avocado_Ripening_Model at: https://mlflow.lepcodes.com/#/experiments/7/runs/eff6576775db4564ab8b105915e0cfe6
üß™ View experiment at: https://mlflow.lepcodes.com/#/experiments/7


# Fine-Tune Model

### Build Another Model

In [None]:
# model_ft = model
# model_ft = build_model()

## Unfreeze Base Model Weights

In [None]:
# base_model = model_ft.get_layer('resnet50')
# base_model.trainable = True
# # base_model.summary()

## Freeze First Layers of CNN

In [None]:
# for layer in base_model.layers:
#     name = layer.name.lower()  # case-insensitive check
#     if ('conv4' in name) or ('conv5' in name):
#         # keep BatchNorm layers frozen (common-sense safety)
#         if isinstance(layer, tf.keras.layers.BatchNormalization):
#             layer.trainable = False
#         else:
#             layer.trainable = True
#     else:
#         layer.trainable = False
# print("\nTrainable layers:")
# for i, layer in enumerate(base_model.layers):
#     if layer.trainable:
#         print(i, layer.name, layer.__class__.__name__)

## Train and Start Experiment in MLFlow

In [None]:
# model_file_path = 

# ETA = 1e-5
# EPOCHS = 30
# OPTIMIZER = "adam"
# model = build_model()
# model = compile_model(model, ETA)
# callbacks = create_callbacks(model_file_path)

# history = model_ft.fit(
#     train_dataset, 
#     epochs=EPOCHS, 
#     validation_data=test_dataset,
#     callbacks=callbacks
# )