# Hyperparameter Tuning dengan KerasTuner

Notebook ini digunakan untuk mencari hyperparameter optimal menggunakan KerasTuner (RandomSearch).


In [None]:
import sys
sys.path.append('../src')

import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras_tuner import HyperModel, RandomSearch
import matplotlib.pyplot as plt
import data_processing as dp
import model
import config

# Set seed untuk reproducibility
tf.random.set_seed(42)
np.random.seed(42)

print(f"TensorFlow version: {tf.__version__}")


## 1. Memuat dan Memproses Data


In [None]:
# Memuat data
df_harvest, df_weather = dp.load_data_from_csv()

# Preprocess
dataset, scaler, labels = dp.preprocess_features(
    df_harvest, 
    df_weather, 
    scaler=None, 
    is_training=True
)

# Dapatkan input shape
sample_batch = next(iter(dataset))
input_shape = (sample_batch[0].shape[1], sample_batch[0].shape[2])

print(f"Input shape: {input_shape}")

# Split data
dataset_size = len(list(dataset))
val_size = int(dataset_size * config.VALIDATION_SPLIT)
train_size = dataset_size - val_size

train_dataset = dataset.take(train_size)
val_dataset = dataset.skip(val_size)

print(f"Train samples: {train_size}")
print(f"Validation samples: {val_size}")


## 2. Setup Hyperparameter Tuning


In [None]:
class GRUHyperModel(HyperModel):
    """Wrapper untuk model GRU yang dapat dituning."""
    def __init__(self, input_shape):
        self.input_shape = input_shape
    
    def build(self, hp):
        return model.build_model(self.input_shape, hp)

# Buat tuner
hypermodel = GRUHyperModel(input_shape)

tuner = RandomSearch(
    hypermodel,
    objective=config.TUNER_OBJECTIVE,
    max_trials=20,  # Jumlah trial
    executions_per_trial=1,
    directory='tuner_results',
    project_name='gru_harvest_failure',
    overwrite=True
)

print("Tuner setup selesai!")
print(f"Max trials: {tuner.max_trials}")
print(f"Objective: {tuner.objective}")


## 3. Menjalankan Hyperparameter Tuning


In [None]:
# Jalankan tuning
print("Memulai hyperparameter tuning...")
print("Ini mungkin memakan waktu lama tergantung jumlah trials...")

tuner.search(
    train_dataset,
    validation_data=val_dataset,
    epochs=30,  # Epoch lebih sedikit untuk tuning
    verbose=1
)

print("\nTuning selesai!")


## 4. Menampilkan Hasil Tuning


In [None]:
# Dapatkan hyperparameter terbaik
best_hp = tuner.get_best_hyperparameters()[0]

print("=" * 60)
print("HYPERPARAMETER TERBAIK")
print("=" * 60)
print(f"units_1 (GRU Layer 1): {best_hp.get('units_1')}")
print(f"dropout_1: {best_hp.get('dropout_1')}")
print(f"units_2 (GRU Layer 2): {best_hp.get('units_2')}")
print(f"dropout_2: {best_hp.get('dropout_2')}")
print(f"learning_rate: {best_hp.get('learning_rate')}")

# Dapatkan model terbaik
best_model = tuner.get_best_models()[0]

print("\n" + "=" * 60)
print("RINGKASAN MODEL TERBAIK")
print("=" * 60)
best_model.summary()


## 5. Evaluasi Model Terbaik


In [None]:
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve
import seaborn as sns

# Prediksi pada validation set
val_predictions = best_model.predict(val_dataset, verbose=0)
val_labels = np.concatenate([y for x, y in val_dataset], axis=0)

# Threshold default
threshold = 0.5
pred_binary = (val_predictions >= threshold).astype(int)

# Classification Report
print("Classification Report:")
print(classification_report(val_labels, pred_binary, target_names=['Normal', 'Gagal Panen']))

# Confusion Matrix
cm = confusion_matrix(val_labels, pred_binary)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Normal', 'Gagal Panen'],
            yticklabels=['Normal', 'Gagal Panen'])
plt.title('Confusion Matrix - Model Terbaik')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

# ROC Curve
fpr, tpr, thresholds = roc_curve(val_labels, val_predictions)
auc_score = roc_auc_score(val_labels, val_predictions)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {auc_score:.3f})')
plt.plot([0, 1], [0, 1], 'k--', label='Random')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve - Model Terbaik')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

print(f"\nAUC Score: {auc_score:.4f}")


## 6. Menyimpan Model Terbaik


In [None]:
import os
import joblib
import json

# Buat folder models jika belum ada
os.makedirs('../models', exist_ok=True)

# Simpan model
best_model.save('../models/gru_model_tuned.keras')
print("Model terbaik disimpan di: ../models/gru_model_tuned.keras")

# Simpan scaler
joblib.dump(scaler, '../models/feature_scaler.joblib')
print("Scaler disimpan di: ../models/feature_scaler.joblib")

# Simpan hyperparameter terbaik
best_config = {
    'units_1': int(best_hp.get('units_1')),
    'dropout_1': float(best_hp.get('dropout_1')),
    'units_2': int(best_hp.get('units_2')),
    'dropout_2': float(best_hp.get('dropout_2')),
    'learning_rate': float(best_hp.get('learning_rate')),
    'input_shape': input_shape,
    'auc_score': float(auc_score)
}

with open('../models/best_hyperparameters.json', 'w') as f:
    json.dump(best_config, f, indent=2)

print("Hyperparameter terbaik disimpan di: ../models/best_hyperparameters.json")
print("\n" + "=" * 60)
print("HYPERPARAMETER TUNING SELESAI!")
print("=" * 60)
