# **Mount Drive**

link dataset : https://drive.google.com/drive/folders/1I3KS5oG1LOAEUdfnjY21Y4KWI2tXAnLU?usp=drive_link

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# **Impor Pustaka**

In [None]:
!pip install mlflow -q

In [None]:
import mlflow
import mlflow.sklearn

import os
import shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import math
import random
import time
from IPython.display import display, HTML, FileLink

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras.layers import (
    Dense, Activation, Dropout, Conv2D, MaxPooling2D,
    BatchNormalization, Flatten, Input, GlobalAveragePooling2D
)
from tensorflow.keras.optimizers import Adam, Adamax
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras import regularizers
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model, load_model, Sequential
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import MobileNetV2, Xception
from tensorflow.keras.applications.efficientnet import EfficientNetB0, preprocess_input

# **Eksplorasi Data**

In [None]:
# Input Dataset
dataset_dir = '/content/drive/Shareddrives/MLOps-klp11/archive'

# Split Data
train_dir = os.path.join(dataset_dir, 'train')
val_dir   = os.path.join(dataset_dir, 'val')
test_dir  = os.path.join(dataset_dir, 'test')

# Fix Parameter
IMAGE_SIZE = (224, 224)
BATCH_SIZE = 32

# Data Generator
datagen = ImageDataGenerator(rescale=1./255)

# Pembangkitan Gambar
train_generator = datagen.flow_from_directory(
    train_dir,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True
)

val_generator = datagen.flow_from_directory(
    val_dir,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

test_generator = datagen.flow_from_directory(
    test_dir,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

In [None]:
# Fungsi Penghitung Distribusi
def count_class_distribution(generator, name=""):
    labels = generator.classes
    class_indices = generator.class_indices
    index_to_class = {v: k for k, v in class_indices.items()}

    df = pd.DataFrame(labels, columns=["class_index"])
    df["class"] = df["class_index"].map(index_to_class)
    class_counts = df["class"].value_counts().sort_index()

    print(f"\nJumlah data per kelas di {name.upper()}:")
    for cls, count in class_counts.items():
        print(f"{cls}: {count} data")

    return class_counts

In [None]:
# Penghitung Distribusi
train_counts = count_class_distribution(train_generator, "train")
val_counts   = count_class_distribution(val_generator, "val")
test_counts  = count_class_distribution(test_generator, "test")

In [None]:
# Fungsi Pembangkitan Gambar per Sampel
def show_one_sample_per_class(generator, title='Contoh Gambar per Kelas'):
    class_indices = generator.class_indices
    index_to_class = {v: k for k, v in class_indices.items()}
    shown_classes = set()

    plt.figure(figsize=(15, 15))
    count = 0

    for images, labels in generator:
        for i in range(len(images)):
            label_index = np.argmax(labels[i])
            class_name = index_to_class[label_index]

            if class_name not in shown_classes:
                count += 1
                plt.subplot(4, 4, count)
                plt.imshow(images[i])
                plt.title(class_name, fontsize=14, color='blue')
                plt.axis('off')
                shown_classes.add(class_name)

            if len(shown_classes) == len(class_indices):
                break
        if len(shown_classes) == len(class_indices):
            break

    plt.suptitle(title, fontsize=18)
    plt.tight_layout()
    plt.show()

# Pembangkitan Gambar per Sampel
show_one_sample_per_class(val_generator)

# **Training Model**

In [None]:
# Set Eksperimen
mlflow.set_experiment("klasifikasi_batik")

# Base Model
base_model = MobileNetV2(
    input_shape=IMAGE_SIZE + (3,),
    include_top=False,
    weights='imagenet'
)
base_model.trainable = False

# Model akhir
model_MobileNetV2 = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.3),
    Dense(train_generator.num_classes, activation='softmax')
])

# Set Parameter
learning_rate = 0.01
EPOCHS = 40

# Compile Model
model_MobileNetV2.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Callbacks
callbacks = [
    EarlyStopping(patience=5, restore_best_weights=True),
    ReduceLROnPlateau(patience=3, factor=0.2),
    ModelCheckpoint('best_model_MobileNetV2.h5', save_best_only=True)
]

# Memulai Proses Pencatatan dengan MLflow
with mlflow.start_run():
    num_classes = train_generator.num_classes

    # Memastikan parameter tercatat pada MLflow
    mlflow.log_param("learning_rate", learning_rate)
    mlflow.log_param("epochs", EPOCHS)

    # Training model
    history_MobileNetV2 = model_MobileNetV2.fit(
        train_generator,
        epochs=EPOCHS,
        validation_data=val_generator,
        callbacks=callbacks
    )

    # Log Metrik per Epoch ke MLflow
    for i in range(EPOCHS):
        if i < len(history_MobileNetV2.history["accuracy"]):
            mlflow.log_metric("train_accuracy", history_MobileNetV2.history["accuracy"][i], step=i)
            mlflow.log_metric("val_accuracy", history_MobileNetV2.history["val_accuracy"][i], step=i)
            mlflow.log_metric("train_loss", history_MobileNetV2.history["loss"][i], step=i)
            mlflow.log_metric("val_loss", history_MobileNetV2.history["val_loss"][i], step=i)


    # Log Model ke MLflow
    mlflow.tensorflow.log_model(
        model=model_MobileNetV2, # Use the correct model variable
        artifact_path="cnn_model"
    )

    # Evaluasi
    val_loss, val_acc = model_MobileNetV2.evaluate(val_generator)
    print(f"Akurasi validasi: {val_acc:.2%}")

In [None]:
# Menampilkan Log
runs = mlflow.search_runs()
runs[["run_id", "params.learning_rate", "params.epochs","metrics.train_accuracy", "metrics.train_loss", "metrics.val_accuracy", "metrics.val_loss"]]

# **Plot Akurasi dan Loss**

In [None]:
# Fungsi Pembuatan Plot
def plot_training_history(history, model_name):
    # Plot Akurasi
    plt.figure(figsize=(4, 3))
    plt.plot(history.history['accuracy'], label='Akurasi Training')
    plt.plot(history.history['val_accuracy'], label='Akurasi Validasi')
    plt.legend()
    plt.title(f'Akurasi per Epoch - {model_name}')
    plt.xlabel('Epoch')
    plt.ylabel('Akurasi')
    plt.grid(True)
    plt.show()

    # Plot Loss
    plt.figure(figsize=(4, 3))
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.legend()
    plt.title(f'Loss per Epoch - {model_name}')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.grid(True)
    plt.show()

# Plot
plot_training_history(history_MobileNetV2, model_name="MobileNetV2")

# **Evaluasi Model**

In [None]:
def evaluate_model_performance(model, test_generator, model_name):
    # Reset generator
    test_generator.reset()

    start_time = time.time()

    # Prediksi seluruh data
    preds = model.predict(test_generator, verbose=1)

    # Hitung durasi inference
    duration = time.time() - start_time
    total_samples = test_generator.samples
    avg_inference_time = duration / total_samples

    y_pred = np.argmax(preds, axis=1)
    y_true = test_generator.classes
    class_names = list(test_generator.class_indices.keys())

    # Confusion Matrix
    cm = confusion_matrix(y_true, y_pred)

    # Plot Confusion Matrix
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=class_names,
                yticklabels=class_names)
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.title(f"Confusion Matrix - {model_name}")
    plt.tight_layout()
    plt.show()

    # Print classification report
    print(f"\nClassification Report: {model_name}")
    print(classification_report(y_true, y_pred, target_names=class_names))

    # Print inference time
    print(f"\nTotal Inference Time: {duration:.2f} seconds")
    print(f"Average Inference Time per Sample: {avg_inference_time:.4f} seconds")

evaluate_model_performance(model_MobileNetV2, test_generator, model_name="MobileNetV2")

# **Test Model**

In [None]:
# Fungsi Tes Model
def show_test_model_random(model, test_generator, batch_size=4):
    # Ambil file paths dan label dari generator
    filepaths = test_generator.filepaths
    class_names = list(test_generator.class_indices.keys())

    # Ambil batch acak dari filepaths
    selected_indices = random.sample(range(len(filepaths)), batch_size)
    selected_images = [filepaths[i] for i in selected_indices]
    selected_labels = [test_generator.labels[i] for i in selected_indices]

    images = []
    for img_path in selected_images:
        img = image.load_img(img_path, target_size=test_generator.target_size)
        img_array = image.img_to_array(img)
        img_array = img_array / 255.0
        images.append(img_array)
    images = np.array(images)

    # Prediksi
    preds = model.predict(images)
    pred_classes = np.argmax(preds, axis=1)

    max_per_row = 4
    rows = math.ceil(batch_size / max_per_row)

    plt.figure(figsize=(max_per_row * 4, rows * 4))
    for i in range(batch_size):
        plt.subplot(rows, max_per_row, i + 1)
        plt.imshow(images[i])
        confidence = preds[i][pred_classes[i]] * 100
        true_label = class_names[selected_labels[i]]
        pred_label = class_names[pred_classes[i]]
        plt.title(f"True: {true_label}\nPred: {pred_label}\nConf: {confidence:.2f}%", fontsize=10)
        plt.axis('off')
    plt.tight_layout()
    plt.show()

# Tes Model
show_test_model_random(model_MobileNetV2, test_generator, batch_size=8)

# **Prediksi Gambar**

In [None]:
# Fungsi Prediksi Gambar Tunggal
def predict_single_image(model, test_generator, image_size=(224, 224)):
    # Ambil Nama Kelas
    class_names = list(test_generator.class_indices.keys())
    filenames = test_generator.filenames
    total_samples = len(filenames)

    # Pilih 1 Index acak
    idx = random.randint(0, total_samples - 1)
    img_path = os.path.join(test_generator.directory, filenames[idx])

    # Load & Preprocess Gambar
    img = tf.keras.utils.load_img(img_path, target_size=image_size)
    img_array = tf.keras.utils.img_to_array(img) / 255.0  # normalize sesuai datagen
    img_batch = np.expand_dims(img_array, axis=0)  # buat jadi batch ukuran (1, h, w, 3)

    # Prediksi
    pred = model.predict(img_batch)[0]
    pred_class_idx = np.argmax(pred)
    pred_class = class_names[pred_class_idx]

    # Visualisasi Gambar
    plt.imshow(img)
    plt.title(f"Prediksi: {pred_class} ({pred[pred_class_idx]*100:.2f}%)")
    plt.axis('off')
    plt.show()

    # Cetak Confidence
    print(f"\nHasil prediksi untuk gambar: {filenames[idx]}")
    for i, prob in enumerate(pred):
        print(f"{class_names[i]:<20}: {prob*100:.2f}%")

# Prediksi
predict_single_image(model_MobileNetV2, test_generator, image_size=(224, 224))

# **Save Model**

In [None]:
# Simpan model.keras
model_MobileNetV2.save("model_batik_mobilenetv2.keras")

# Simpan weight model.h5
FILENAME = "model_batik_mobilenetv2.weights.h5"
model_MobileNetV2.save_weights(FILENAME)