# Model Prototyping - GRU untuk Prediksi Gagal Panen

Notebook ini digunakan untuk prototyping dan testing arsitektur model GRU sebelum melakukan hyperparameter tuning.


In [None]:
import sys
sys.path.append('../src')

import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve
import data_processing as dp
import model
import config

# Set seed untuk reproducibility
tf.random.set_seed(42)
np.random.seed(42)

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU Available: {tf.config.list_physical_devices('GPU')}")


## 1. Memuat dan Memproses Data


In [None]:
# Memuat data
df_harvest, df_weather = dp.load_data_from_csv()

# Preprocess
dataset, scaler, labels = dp.preprocess_features(
    df_harvest, 
    df_weather, 
    scaler=None, 
    is_training=True
)

# Dapatkan input shape
sample_batch = next(iter(dataset))
input_shape = (sample_batch[0].shape[1], sample_batch[0].shape[2])

print(f"Input shape: {input_shape}")
print(f"Sequence length: {config.SEQUENCE_LENGTH}")
print(f"Number of features: {input_shape[1]}")

# Split data
dataset_size = len(list(dataset))
val_size = int(dataset_size * config.VALIDATION_SPLIT)
train_size = dataset_size - val_size

train_dataset = dataset.take(train_size)
val_dataset = dataset.skip(train_size)

print(f"\nTrain samples: {train_size}")
print(f"Validation samples: {val_size}")


## 2. Membangun Model GRU


In [None]:
# Build model dengan hyperparameter default
prototype_model = model.build_model(input_shape, hp=None)

# Summary model
prototype_model.summary()

# Visualisasi arsitektur
keras.utils.plot_model(prototype_model, show_shapes=True, show_layer_names=True)


## 3. Training Model Prototype


In [None]:
# Callbacks
callbacks = [
    keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True,
        verbose=1
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=5,
        min_lr=1e-6,
        verbose=1
    )
]

# Training
print("Memulai training...")
history = prototype_model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=config.EPOCHS,
    callbacks=callbacks,
    verbose=1
)


## 4. Visualisasi Training History


In [None]:
# Plot training history
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Loss
axes[0, 0].plot(history.history['loss'], label='Train Loss')
axes[0, 0].plot(history.history['val_loss'], label='Val Loss')
axes[0, 0].set_title('Model Loss')
axes[0, 0].set_xlabel('Epoch')
axes[0, 0].set_ylabel('Loss')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# Accuracy
axes[0, 1].plot(history.history['accuracy'], label='Train Accuracy')
axes[0, 1].plot(history.history['val_accuracy'], label='Val Accuracy')
axes[0, 1].set_title('Model Accuracy')
axes[0, 1].set_xlabel('Epoch')
axes[0, 1].set_ylabel('Accuracy')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# Precision
axes[1, 0].plot(history.history['precision'], label='Train Precision')
axes[1, 0].plot(history.history['val_precision'], label='Val Precision')
axes[1, 0].set_title('Model Precision')
axes[1, 0].set_xlabel('Epoch')
axes[1, 0].set_ylabel('Precision')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

# Recall
axes[1, 1].plot(history.history['recall'], label='Train Recall')
axes[1, 1].plot(history.history['val_recall'], label='Val Recall')
axes[1, 1].set_title('Model Recall')
axes[1, 1].set_xlabel('Epoch')
axes[1, 1].set_ylabel('Recall')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()


## 5. Evaluasi Model


In [None]:
# Prediksi pada validation set
val_predictions = prototype_model.predict(val_dataset, verbose=0)
val_labels = np.concatenate([y for x, y in val_dataset], axis=0)

# Threshold default
threshold = 0.5
pred_binary = (val_predictions >= threshold).astype(int)

# Classification Report
print("Classification Report (Threshold = 0.5):")
print(classification_report(val_labels, pred_binary, target_names=['Normal', 'Gagal Panen']))

# Confusion Matrix
cm = confusion_matrix(val_labels, pred_binary)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Normal', 'Gagal Panen'],
            yticklabels=['Normal', 'Gagal Panen'])
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

# ROC Curve
fpr, tpr, thresholds = roc_curve(val_labels, val_predictions)
auc_score = roc_auc_score(val_labels, val_predictions)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {auc_score:.3f})')
plt.plot([0, 1], [0, 1], 'k--', label='Random')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

print(f"\nAUC Score: {auc_score:.4f}")


## 6. Threshold Optimization


In [None]:
from sklearn.metrics import f1_score, precision_score, recall_score

# Cari threshold optimal berdasarkan F1-score
thresholds = np.arange(0.3, 0.8, 0.05)
f1_scores = []
precision_scores = []
recall_scores = []

for threshold in thresholds:
    pred_binary = (val_predictions >= threshold).astype(int)
    f1_scores.append(f1_score(val_labels, pred_binary))
    precision_scores.append(precision_score(val_labels, pred_binary))
    recall_scores.append(recall_score(val_labels, pred_binary))

# Plot
plt.figure(figsize=(10, 6))
plt.plot(thresholds, f1_scores, label='F1-Score', marker='o')
plt.plot(thresholds, precision_scores, label='Precision', marker='s')
plt.plot(thresholds, recall_scores, label='Recall', marker='^')
plt.xlabel('Threshold')
plt.ylabel('Score')
plt.title('Metric Scores vs Threshold')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

# Threshold optimal
best_idx = np.argmax(f1_scores)
best_threshold = thresholds[best_idx]
print(f"Threshold optimal: {best_threshold:.3f}")
print(f"F1-Score: {f1_scores[best_idx]:.3f}")
print(f"Precision: {precision_scores[best_idx]:.3f}")
print(f"Recall: {recall_scores[best_idx]:.3f}")

# Evaluasi dengan threshold optimal
pred_optimal = (val_predictions >= best_threshold).astype(int)
print("\nClassification Report (Threshold Optimal):")
print(classification_report(val_labels, pred_optimal, target_names=['Normal', 'Gagal Panen']))
