In [0]:
# Install dependencies
%pip install tensorflow opencv-python

In [0]:
# Import necessary modules

# Storage
from azure.storage.filedatalake import DataLakeServiceClient
import databricks.sdk

# ML
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, Callback
from tensorflow.keras.models import Model
import mlflow
import mlflow.tensorflow

# General
import random
import os

# from tensorflow.keras.preprocessing.image import ImageDataGenerator
# from tensorflow.keras.applications import ResNet50

In [0]:
# Define variables

# Set training and validtion dataset paths
catalog = "main"
schema = "ml_data"
volume_name = "processkaggledata"
train_dir = f"/Volumes/{catalog}/{schema}/{volume_name}/train_cropped"
val_dir   = f"/Volumes/{catalog}/{schema}/{volume_name}/validation_cropped"

# Set image and batch settings
s = 160
IMG_HEIGHT, IMG_WIDTH = s, s
BATCH_SIZE = 4
EPOCHS = 10

In [0]:
# Set up MLflow experiment

mlflow.set_experiment("/Users/naveenkg676@outlook.com/rouge_exp")

In [0]:
# Preprocess the images

train_ds = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    image_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    label_mode='categorical'
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    val_dir,
    image_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    label_mode='categorical'
)

# Store class names and number of classes in variables because the dataset (tf.data.Dataset) generated from the prev step loses its metadata once the transformations are applied (preprocess and prefetch)

class_names = train_ds.class_names
num_classes = len(class_names)

print(class_names)
print(num_classes)

AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.shuffle(100).prefetch(AUTOTUNE)
val_ds = val_ds.prefetch(AUTOTUNE)

In [0]:
# === Custom MLflow callback ===
class MLflowMetricsLogger(Callback):
    def on_epoch_end(self, epoch, logs=None):
        if logs is not None:
            for k, v in logs.items():
                mlflow.log_metric(k, float(v), step=epoch)

# === Model Setup ===
input_tensor = Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3))
base_model = EfficientNetB0(weights='imagenet', include_top=False)
base_model.trainable = False  # frozen for transfer learning

x = base_model(input_tensor, training=False)
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.3)(x)
output = Dense(num_classes, activation='softmax')(x) # use num_classes because Dense expects an int

model = Model(inputs=input_tensor, outputs=output)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [0]:
# === Training with MLflow ===
with mlflow.start_run(run_name="EffNetB0_TransferLearning_Advanced"):
    mlflow.log_params({
        "image_height": IMG_HEIGHT,
        "image_width": IMG_WIDTH,
        "batch_size": BATCH_SIZE,
        "epochs": EPOCHS,
        "model": "EfficientNetB0",
        "num_classes": num_classes,
        "class_names": class_names
    })

    callbacks = [
        EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2),
        MLflowMetricsLogger()
    ]

    history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=EPOCHS,
        verbose=1,
        callbacks=callbacks
    )

    # Log final metrics
    mlflow.log_metric("final_train_accuracy", history.history['accuracy'][-1])
    mlflow.log_metric("final_val_accuracy", history.history['val_accuracy'][-1])

    
    mlflow.tensorflow.log_model(model, artifact_path="efficientnetB0_rice_model_3")