In [1]:
import os
import tensorflow as tf
from tensorflow.keras.regularizers import l2
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img
from tensorflow.keras.applications import MobileNetV3Large
from tensorflow.keras import Sequential
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
from tensorflow.keras.metrics import Precision, Recall, BinaryAccuracy, F1Score
from tensorflow.keras.losses import KLDivergence, BinaryCrossentropy
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Input, Dense, Dropout, GlobalAveragePooling2D, Flatten, MaxPooling2D, Conv2D, BatchNormalization, Activation
from sklearn.metrics import confusion_matrix, classification_report, ConfusionMatrixDisplay
from sklearn.model_selection import GridSearchCV
import seaborn as sns
import numpy as np
import scipy
from PIL import Image
import warnings
warnings.catch_warnings
import cv2 as cv
from scipy import stats
from tqdm import tqdm
import shutil
import time

2024-12-24 18:41:48.471024: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-12-24 18:41:48.493233: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-12-24 18:41:48.493250: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-12-24 18:41:48.493273: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-24 18:41:48.498220: I tensorflow/core/platform/cpu_feature_g

# Load Dataset (Melanoma)

In [2]:
# # Importing required libraries
# import kagglehub

# # Downloading the dataset
# path = kagglehub.dataset_download("drscarlat/melanoma")
# print("Path to dataset files:", path)

# # Moving dataset to the current directory
# destination_dir = '.'  # Current directory

# for item in os.listdir(path):
#     source_path = os.path.join(path, item)
#     destination_path = os.path.join(destination_dir, item)

#     if os.path.isfile(source_path):
#         shutil.move(source_path, destination_path)
#     elif os.path.isdir(source_path):
#         shutil.move(source_path, destination_path)

# print("Dataset files moved to the current directory.")

# Dataset

In [3]:
# Basic config
BATCH_SIZE = 32
IMG_SIZE = 224
EPOCHS = 10

In [4]:
train_path = './dermmel/DermMel/train_sep'
val_path = './dermmel/DermMel/valid'
test_path = './dermmel/DermMel/test'

In [5]:
classes = [class_name for class_name in os.listdir(train_path)]
classes

['NotMelanoma', 'Melanoma']

# Base

In [6]:
# ImageDataGenerators
# Augmentation
train_datagen_aug = ImageDataGenerator(
    rotation_range = 35,
    horizontal_flip = True,
    vertical_flip = True,
    width_shift_range = 0.3,
    fill_mode = 'nearest',
    )

# No augmentation
train_datagen = ImageDataGenerator()
val_datagen = ImageDataGenerator()
test_datagen = ImageDataGenerator()

# Generators
train_generator_aug = train_datagen_aug.flow_from_directory(
    train_path,
    target_size = (IMG_SIZE, IMG_SIZE),
    batch_size = BATCH_SIZE,
    class_mode = 'binary',
    color_mode = 'rgb'
)

train_generator = train_datagen.flow_from_directory(
    train_path,
    target_size = (IMG_SIZE, IMG_SIZE),
    batch_size = BATCH_SIZE,
    class_mode = 'binary',
    color_mode = 'rgb'
)

validation_generator = val_datagen.flow_from_directory(
    val_path,
    target_size = (IMG_SIZE, IMG_SIZE),
    batch_size = BATCH_SIZE,
    class_mode = 'binary',
    color_mode = 'rgb',
)

test_generator = test_datagen.flow_from_directory(
    test_path,
    target_size = (IMG_SIZE, IMG_SIZE),
    batch_size = BATCH_SIZE,
    class_mode = 'binary',
    color_mode = 'rgb',
    shuffle = False
)

Found 10682 images belonging to 2 classes.
Found 10682 images belonging to 2 classes.
Found 3562 images belonging to 2 classes.
Found 3561 images belonging to 2 classes.


In [7]:
def evaluate_model(model, test_generator, classes, threshold=0.5):
    # Predict labels for the test data
    y_pred = model.predict(test_generator)
    # Binarize predictions based on threshold
    y_pred = (y_pred > threshold).astype(int)
    # Get true labels from the test generator
    y_true = test_generator.classes

    # Generate confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    print("Confusion Matrix:")
    print(cm)

    # Display the confusion matrix
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=classes)
    disp.plot(cmap=plt.cm.Blues)
    plt.show()

    # Display classification report
    print("Classification Report:")
    print(classification_report(y_true, y_pred, target_names=classes, digits=4))

In [8]:
def plot_training_history(history):
    """
    Plots the training and validation accuracy and loss over epochs.

    Parameters:
    - history: The history object returned by the model's fit method.
    """
    plt.figure(figsize=(12, 4))

    # Plot training and validation accuracy
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()

    # Plot training and validation loss
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    plt.tight_layout()
    plt.show()

In [9]:
class CustomF1Score(F1Score):
    def __init__(self, name="f1_score", dtype=None):
        super(F1Score, self).__init__(name=name, dtype=dtype)
        self.true_positives = self.add_weight(name="true_positives", initializer="zeros")
        self.false_positives = self.add_weight(name="false_positives", initializer="zeros")
        self.false_negatives = self.add_weight(name="false_negatives", initializer="zeros")

    def update_state(self, y_true, y_pred, sample_weight=None):
        # Convert predictions to binary based on a 0.5 threshold
        y_pred = tf.cast(tf.greater(y_pred, 0.5), tf.float32)
        y_true = tf.cast(y_true, tf.float32)

        # Calculate true positives, false positives, and false negatives
        true_positives = tf.reduce_sum(y_true * y_pred)
        false_positives = tf.reduce_sum(y_pred * (1 - y_true))
        false_negatives = tf.reduce_sum(y_true * (1 - y_pred))

        # Update the state variables
        self.true_positives.assign_add(true_positives)
        self.false_positives.assign_add(false_positives)
        self.false_negatives.assign_add(false_negatives)

    def result(self):
        precision = self.true_positives / (self.true_positives + self.false_positives + tf.keras.backend.epsilon())
        recall = self.true_positives / (self.true_positives + self.false_negatives + tf.keras.backend.epsilon())
        f1_score = 2 * (precision * recall) / (precision + recall + tf.keras.backend.epsilon())
        return f1_score

    def reset_state(self):
        self.true_positives.assign(0)
        self.false_positives.assign(0)
        self.false_negatives.assign(0)

In [10]:
import pandas as pd

def save_training_history(history, filename):
    """Saves training history to a CSV file with the model name.

    Args:
        history: The training history object returned by model.fit().
        filename: The name of the CSV file to save the history to.
        model_name: The name of the model (string) to include in the CSV file.
    """
    results_df = pd.DataFrame(history.history)
    results_df.insert(0, 'epoch', range(1, len(results_df) + 1))  # Menambahkan kolom 'epoch'
    results_df.to_csv(filename, index=False)

# Tuning

In [11]:
def build_model(IMG_SIZE):
    base_model = MobileNetV3Large(
        input_shape=(IMG_SIZE, IMG_SIZE, 3),
        include_top=False,
        weights='imagenet',
        include_preprocessing=True
    )

    base_model.trainable = True

    model = Sequential([
        Input(shape=(IMG_SIZE, IMG_SIZE, 3)),
        base_model,
        GlobalAveragePooling2D(),
        Dense(1024, activation='relu', kernel_regularizer=l2(0.001)),
        Dropout(0.5),
        Dense(512, activation='relu', kernel_regularizer=l2(0.001)),
        Dropout(0.3),
        Dense(1, activation='sigmoid')
    ])
    return model

In [12]:
# Metrics
metrics = [
    BinaryAccuracy(name = 'accuracy'),
    Precision(name = 'precision'),
    Recall(name = 'recall'),
    CustomF1Score(name = 'f1_score')
]

2024-12-24 18:41:50.704194: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1886] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22282 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4090, pci bus id: 0000:02:00.0, compute capability: 8.9


In [13]:
learning_rates = [1e-3, 1e-4, 1e-5]
optimizers = ['adam', 'sgd', 'rmsprop']

In [14]:
def get_callbacks():
    lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-7, verbose=1)
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)
    return [lr_scheduler, early_stopping]

In [15]:
# Inisialisasi hasil
results = []

# Loop melalui setiap kombinasi learning rate dan optimizer
for lr in learning_rates:
    for opt in optimizers:
        print(f"\nTraining dengan Optimizer: {opt.upper()}, Learning Rate: {lr}")

        # Bangun model
        model = build_model(IMG_SIZE)

        # Pilih optimizer
        if opt == 'adam':
            optimizer = Adam(
                learning_rate=lr, 
                beta_1=0.9,    
                beta_2=0.999,   
                epsilon=1e-7
            )
        elif opt == 'sgd':
            optimizer = SGD(
                learning_rate=lr, 
                momentum=0.9, 
                nesterov=True
            )
        elif opt == 'rmsprop':
            optimizer = RMSprop(
                learning_rate=lr, 
                rho=0.9,        
                momentum=0.9,   
                centered=True,
                epsilon=1e-7
            )
        else:
            raise ValueError("Optimizer tidak dikenali.")

        # Kompilasi model
        model.compile(optimizer=optimizer,
                      loss='binary_crossentropy',
                      metrics=metrics)

        # Siapkan callbacks
        callbacks = get_callbacks()

        # Training Time
        start_time = time.time()

        # Latih model
        history = model.fit(
            train_generator,
            epochs=EPOCHS,
            validation_data = validation_generator,
            steps_per_epoch = train_generator.samples // BATCH_SIZE,
            validation_steps = validation_generator.samples // BATCH_SIZE,
            callbacks=callbacks,
            verbose=1
        )

        end_time = time.time()
        total_training_time = end_time - start_time
        average_time_per_epoch = total_training_time / EPOCHS

        # Evaluasi model pada data validation
        val_result = model.evaluate(validation_generator, verbose=0, return_dict = True)

        # Simpan hasil
        results.append({
            'optimizer': opt,
            'learning_rate': lr,
            'val_loss': val_result['loss'],
            'val_accuracy': val_result['accuracy'],
            'val_f1_score': val_result['f1_score'],
            'total_training_time': total_training_time,
            'average_time_per_epoch': average_time_per_epoch
        })

        # Hapus model untuk menghemat memori
        tf.keras.backend.clear_session()


Training dengan Optimizer: ADAM, Learning Rate: 0.001
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v3/weights_mobilenet_v3_large_224_1.0_float_no_top_v2.h5
Epoch 1/10


2024-12-24 18:41:59.017067: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2024-12-24 18:41:59.051720: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:442] Loaded cuDNN version 8905
2024-12-24 18:41:59.104380: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2024-12-24 18:42:00.122656: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x9662110 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-12-24 18:42:00.122674: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 4090, Compute Capability 8.9
2024-12-24 18:42:00.125103: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-12-24 18:42:00.172688: I ./tensorflow/compiler/jit/device_compiler.

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Training dengan Optimizer: SGD, Learning Rate: 0.001
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Training dengan Optimizer: RMSPROP, Learning Rate: 0.001
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Training dengan Optimizer: ADAM, Learning Rate: 0.0001
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Training dengan Optimizer: SGD, Learning Rate: 0.0001
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Training dengan Optimizer: RMSPROP, Learning Rate: 0.0001
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 10: ReduceLROnPlateau reducing learning rate to 4.9999998

In [16]:
# Konversi hasil ke DataFrame untuk analisis
results_df = pd.DataFrame(results)

# Tampilkan hasil
print("\nHasil Hyperparameter Tuning:")
print(results_df)

# Temukan kombinasi terbaik
best_result = results_df.sort_values(
    by=['val_accuracy', 'val_f1_score', 'total_training_time'],
    ascending=[False, False, True]
).iloc[0]
print("\nKombinasi terbaik:")
print(best_result)


Hasil Hyperparameter Tuning:
  optimizer  learning_rate  val_loss  val_accuracy  val_f1_score  \
0      adam        0.00100  2.623983      0.831836      0.803671   
1       sgd        0.00100  1.622772      0.952274      0.953450   
2   rmsprop        0.00100  0.381882      0.833520      0.807904   
3      adam        0.00010  0.457177      0.957047      0.958435   
4       sgd        0.00010  1.808155      0.943290      0.946105   
5   rmsprop        0.00010  0.711860      0.947782      0.948647   
6      adam        0.00001  1.535156      0.947501      0.949500   
7       sgd        0.00001  1.993059      0.932622      0.935588   
8   rmsprop        0.00001  0.687580      0.948624      0.951135   

   total_training_time  average_time_per_epoch  
0           599.044413               59.904441  
1           537.001595               53.700159  
2           541.719952               54.171995  
3           549.321375               54.932137  
4           545.700786               54.5700