In [17]:
# Import necessary libraries
import mlflow
import mlflow.keras
import tensorflow as tf
from tensorflow import keras
import os
from os import path, listdir

# Define the function to load the dataset
def load_image_dataset(file_path):
    all_image_dirs = [os.path.join(file_path, f) for f in os.listdir(file_path) if not os.path.isdir(os.path.join(file_path, f))]
    all_image_labels = []
    for f in all_image_dirs:
        if "cat" in f:
            all_image_labels.append(0)
        else:
            all_image_labels.append(1)
    return all_image_dirs, all_image_labels

# Load dataset
train_path = r"C:/Users/AnanyaSarkar/Documents/dogscats/data/train"
all_image_dirs, all_image_labels = load_image_dataset(train_path)

# Set up MLflow experiment
mlflow.set_experiment("dog-cat-classification-optimizer-comparison")

# Split the data into training and testing sets
num_train_image = int(len(all_image_labels) * 0.8)
train_image_dirs, train_label = all_image_dirs[:num_train_image], all_image_labels[:num_train_image]
test_image_dirs, test_label = all_image_dirs[num_train_image:], all_image_labels[num_train_image:]

# Create TensorFlow datasets
train_path_label = tf.data.Dataset.from_tensor_slices((train_image_dirs, train_label))
test_path_label = tf.data.Dataset.from_tensor_slices((test_image_dirs, test_label))

# Define the image preprocessing function
def load_and_preprocess_image(path):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [192, 192])
    image /= 255.0  # Normalize to [0,1]
    image = 2 * image - 1  # Normalize to [-1,1]
    return image

# Prepare the training and testing datasets
train_image_label_ds = train_path_label.map(lambda x, y: (load_and_preprocess_image(x), y))
test_image_label_ds = test_path_label.map(lambda x, y: (load_and_preprocess_image(x), y)).batch(1)

# Define the CNN model
def create_model(optimizer):
    mobile_net = tf.keras.applications.MobileNetV2(input_shape=(192, 192, 3), include_top=False)
    mobile_net.trainable = False  # Freeze the base model
    cnn_model = keras.models.Sequential([
        mobile_net,
        keras.layers.GlobalAveragePooling2D(),
        keras.layers.Flatten(),
        keras.layers.Dense(64, activation="relu"),
        keras.layers.Dense(2, activation="softmax")
    ])
    cnn_model.compile(optimizer=optimizer,
                      loss='sparse_categorical_crossentropy',
                      metrics=["accuracy"])
    return cnn_model

# Define optimizer variations
optimizers = [
    tf.keras.optimizers.Adam(),
    tf.keras.optimizers.SGD(),
    tf.keras.optimizers.RMSprop(),
    tf.keras.optimizers.Adam(learning_rate=0.0001)  # Adding a fourth optimizer variation with a different learning rate
]

EPOCHS = 2
BATCH_SIZE = 32
AUTOTUNE = tf.data.experimental.AUTOTUNE
steps_per_epoch = len(train_image_dirs) // BATCH_SIZE

# Train models with different optimizers and log results
for optimizer in optimizers:
    optimizer_name = optimizer.__class__.__name__
    # Create a descriptive run name
    run_name = f"{optimizer_name}_{EPOCHS}epochs_{BATCH_SIZE}batch"
    
    with mlflow.start_run(run_name=run_name):
        # Log parameters for the current run
        mlflow.log_param("optimizer", optimizer_name)
        mlflow.log_param("batch_size", BATCH_SIZE)
        mlflow.log_param("epochs", EPOCHS)
        
        # Create and fit the model
        cnn_model = create_model(optimizer)
        train_ds = train_image_label_ds.shuffle(buffer_size=len(train_image_dirs)).repeat().batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)
        history = cnn_model.fit(train_ds, epochs=EPOCHS, steps_per_epoch=steps_per_epoch)

        # Log metrics
        mlflow.log_metric("accuracy", history.history['accuracy'][-1])
        mlflow.log_metric("loss", history.history['loss'][-1])

        # Log the model in MLflow
        mlflow.keras.log_model(cnn_model, "model")


2024/10/29 10:00:22 INFO mlflow.tracking.fluent: Experiment with name 'dog-cat-classification-optimizer-comparison' does not exist. Creating a new experiment.


Epoch 1/2
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1130s[0m 1s/step - accuracy: 0.9986 - loss: 0.0050
Epoch 2/2
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m932s[0m 1s/step - accuracy: 1.0000 - loss: 4.1889e-08


2024/10/29 10:36:08 INFO mlflow.tracking._tracking_service.client: 🏃 View run Adam_2epochs_32batch at: http://localhost:5000/#/experiments/847731672344576682/runs/53f2042484724f20bd9fc32a1c40eaf4.
2024/10/29 10:36:08 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/847731672344576682.


Epoch 1/2
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1659s[0m 2s/step - accuracy: 0.9878 - loss: 0.0365
Epoch 2/2
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1261s[0m 2s/step - accuracy: 1.0000 - loss: 5.8203e-04


2024/10/29 11:26:24 INFO mlflow.tracking._tracking_service.client: 🏃 View run SGD_2epochs_32batch at: http://localhost:5000/#/experiments/847731672344576682/runs/1505b19127ed41068eaef491edfe795e.
2024/10/29 11:26:24 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/847731672344576682.


Epoch 1/2
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2386s[0m 2s/step - accuracy: 0.9933 - loss: 0.0115
Epoch 2/2
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m973s[0m 2s/step - accuracy: 1.0000 - loss: 1.5788e-06


2024/10/29 12:24:28 INFO mlflow.tracking._tracking_service.client: 🏃 View run RMSprop_2epochs_32batch at: http://localhost:5000/#/experiments/847731672344576682/runs/78d1fa17ec6449eda341423ab8e4f864.
2024/10/29 12:24:28 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/847731672344576682.


Epoch 1/2
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1848s[0m 2s/step - accuracy: 0.9725 - loss: 0.0620
Epoch 2/2
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1328s[0m 2s/step - accuracy: 1.0000 - loss: 1.9257e-04


2024/10/29 13:20:18 INFO mlflow.tracking._tracking_service.client: 🏃 View run Adam_2epochs_32batch at: http://localhost:5000/#/experiments/847731672344576682/runs/32a8b8456b314993b5cd1133d6f4dc29.
2024/10/29 13:20:18 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/847731672344576682.
