In [1]:
# Install required packages if needed
!pip install optuna tensorflow scikit-learn pandas matplotlib --quiet

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/400.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━[0m [32m286.7/400.9 kB[0m [31m9.4 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m400.9/400.9 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import optuna
import time

In [5]:
# -------------------------
# 1. Load and preprocess data
# -------------------------

# Load CSV
df = pd.read_csv('/content/drive/MyDrive/Sample/AEP_hourly.csv', parse_dates=['Datetime'], index_col='Datetime')

# Check duplicates
print("Duplicate timestamps:", df.index.duplicated().sum())

# Drop duplicate timestamps (keep the first occurrence)
df = df[~df.index.duplicated(keep='first')]

# Rename column
df = df.rename(columns={'AEP_MW': 'target'})

# Use lowercase 'h' for hourly frequency and forward-fill missing values
df = df.asfreq('h').fillna(method='ffill')

# Scale
scaler = StandardScaler()
df['scaled'] = scaler.fit_transform(df[['target']])

LOOKBACK = 24
HORIZON = 1

Duplicate timestamps: 4


  df = df.asfreq('h').fillna(method='ffill')


In [6]:
# Create sequences
def create_sequences(data, lookback=24, horizon=1):
    X, y = [], []
    for i in range(len(data) - lookback - horizon + 1):
        X.append(data[i:i+lookback])
        y.append(data[i+lookback+horizon-1])
    return np.array(X), np.array(y)

X, y = create_sequences(df['scaled'].values, LOOKBACK, HORIZON)
split = int(0.8 * len(X))
x_train, x_val = X[:split], X[split:]
y_train, y_val = y[:split], y[split:]

x_train = np.expand_dims(x_train, -1)
x_val = np.expand_dims(x_val, -1)

In [7]:
# -------------------------
# 2. Define Optuna objective
# -------------------------
def objective(trial):
    # Suggest hyperparameters
    lr = trial.suggest_float("learning_rate", 1e-4, 1e-2, log=True)
    batch_size = trial.suggest_categorical("batch_size", [16, 32, 64, 128])
    num_units = trial.suggest_int("num_units", 32, 128, step=32)
    dropout = trial.suggest_float("dropout", 0.0, 0.5)

    # Build GRU model (Keras Sequential)
    model = keras.Sequential([
        layers.GRU(num_units, activation='tanh', dropout=dropout, input_shape=(LOOKBACK, 1)),
        layers.Dense(1)
    ])

    # Compile model
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=lr),
        loss='mse',
        metrics=[keras.metrics.RootMeanSquaredError()]
    )

    # Train model and track time
    start = time.time()
    history = model.fit(
        x_train, y_train,
        validation_data=(x_val, y_val),
        epochs=5,
        batch_size=batch_size,
        verbose=0
    )
    elapsed = time.time() - start

    # Evaluate validation RMSE
    val_rmse = history.history["val_root_mean_squared_error"][-1]

    # Save training time for analysis
    trial.set_user_attr("train_time_sec", elapsed)

    return val_rmse

In [9]:
# -------------------------
# 3. Run Optimization
# -------------------------
sampler = optuna.samplers.RandomSampler()
study = optuna.create_study(direction="minimize", sampler=sampler)
study.optimize(objective, n_trials=5, show_progress_bar=True)

print("Best trial:")
print(study.best_trial.params)
print(f"Validation RMSE: {study.best_value:.4f}")

[I 2025-10-22 14:00:51,519] A new study created in memory with name: no-name-c1c1dbd2-2599-487f-95c4-a308f511163a


  0%|          | 0/5 [00:00<?, ?it/s]

[I 2025-10-22 14:01:59,312] Trial 0 finished with value: 0.3518332839012146 and parameters: {'learning_rate': 0.008344492311715107, 'batch_size': 128, 'num_units': 32, 'dropout': 0.3435692592058902}. Best is trial 0 with value: 0.3518332839012146.
[I 2025-10-22 14:06:55,112] Trial 1 finished with value: 0.3879954218864441 and parameters: {'learning_rate': 0.008054951996876574, 'batch_size': 32, 'num_units': 64, 'dropout': 0.3740793025758298}. Best is trial 0 with value: 0.3518332839012146.
[I 2025-10-22 14:17:00,950] Trial 2 finished with value: 0.49318817257881165 and parameters: {'learning_rate': 0.006051106861343675, 'batch_size': 16, 'num_units': 128, 'dropout': 0.4761310333701748}. Best is trial 0 with value: 0.3518332839012146.
[I 2025-10-22 14:20:10,994] Trial 3 finished with value: 0.10399547219276428 and parameters: {'learning_rate': 0.0003467054968507029, 'batch_size': 128, 'num_units': 96, 'dropout': 0.010454076829338743}. Best is trial 3 with value: 0.10399547219276428.
[I 

In [10]:
# -------------------------
# 4. Optional: Try GP Sampler
# -------------------------
gp_study = optuna.create_study(direction="minimize",
                               sampler=optuna.samplers.GPSampler())
gp_study.optimize(objective, n_trials=5, show_progress_bar=True)

print("\nBest trial (GP Sampler):")
print(gp_study.best_trial.params)
print(f"Validation RMSE: {gp_study.best_value:.4f}")

  sampler=optuna.samplers.GPSampler())
[I 2025-10-22 14:33:35,588] A new study created in memory with name: no-name-8fad7c42-ce4d-4023-b64f-514a268b00b8


  0%|          | 0/5 [00:00<?, ?it/s]

[I 2025-10-22 14:40:00,664] Trial 0 finished with value: 0.35762348771095276 and parameters: {'learning_rate': 0.00014190600669799423, 'batch_size': 32, 'num_units': 128, 'dropout': 0.32843730761979345}. Best is trial 0 with value: 0.35762348771095276.
[I 2025-10-22 14:50:15,101] Trial 1 finished with value: 0.1473422646522522 and parameters: {'learning_rate': 0.0012240406469214043, 'batch_size': 16, 'num_units': 96, 'dropout': 0.1414671192925417}. Best is trial 1 with value: 0.1473422646522522.
[I 2025-10-22 14:55:07,572] Trial 2 finished with value: 0.42972418665885925 and parameters: {'learning_rate': 0.0067983330571376255, 'batch_size': 64, 'num_units': 96, 'dropout': 0.4376916938112634}. Best is trial 1 with value: 0.1473422646522522.
[I 2025-10-22 15:01:45,099] Trial 3 finished with value: 0.3300950527191162 and parameters: {'learning_rate': 0.0007657861205476051, 'batch_size': 16, 'num_units': 64, 'dropout': 0.322414412181582}. Best is trial 1 with value: 0.1473422646522522.
[I 

In [11]:
# ============================================================
# 6️⃣ Run Optuna for TPESampler
# ============================================================
print("\n🔹 Running TPESampler optimization...")
tpe_study = optuna.create_study(direction="minimize",
                                sampler=optuna.samplers.TPESampler())
tpe_study.optimize(objective, n_trials=5, show_progress_bar=True)

print("\nBest TPESampler Params:", tpe_study.best_trial.params)
print("Best TPESampler MSE:", tpe_study.best_value)

[I 2025-10-22 15:12:42,139] A new study created in memory with name: no-name-0bdcdeb0-13f2-4f9e-9718-fc2895005b13



🔹 Running TPESampler optimization...


  0%|          | 0/5 [00:00<?, ?it/s]

[I 2025-10-22 15:15:28,479] Trial 0 finished with value: 0.2510155737400055 and parameters: {'learning_rate': 0.005443640741195861, 'batch_size': 64, 'num_units': 64, 'dropout': 0.2444662716288114}. Best is trial 0 with value: 0.2510155737400055.
[I 2025-10-22 15:18:04,040] Trial 1 finished with value: 0.17858293652534485 and parameters: {'learning_rate': 0.0010035366670202866, 'batch_size': 128, 'num_units': 64, 'dropout': 0.15249021571876709}. Best is trial 1 with value: 0.17858293652534485.
[I 2025-10-22 15:23:30,931] Trial 2 finished with value: 0.10315074771642685 and parameters: {'learning_rate': 0.0019239529711284427, 'batch_size': 32, 'num_units': 96, 'dropout': 0.07273403844134307}. Best is trial 2 with value: 0.10315074771642685.
[I 2025-10-22 15:28:29,784] Trial 3 finished with value: 0.11911177635192871 and parameters: {'learning_rate': 0.0003857095770267632, 'batch_size': 32, 'num_units': 64, 'dropout': 0.10282645259677237}. Best is trial 2 with value: 0.10315074771642685.

In [14]:
# ============================================================
# 7️⃣ Compare all results
# ============================================================
print("\n==================== MSE COMPARISON ====================")
print(f"RandomSampler Best MSE: {study.best_value:.6f}")
print(f"GPSampler Best MSE:    {gp_study.best_value:.6f}")
print(f"TPESampler Best MSE:   {tpe_study.best_value:.6f}")

best_method = min(
    [('Random', study.best_value),
     ('GP', gp_study.best_value),
     ('TPE', tpe_study.best_value)],
    key=lambda x: x[1]
)
print(f"\n Best overall sampler: {best_method[0]} (MSE={best_method[1]:.6f})")


RandomSampler Best MSE: 0.103995
GPSampler Best MSE:    0.147342
TPESampler Best MSE:   0.103151

 Best overall sampler: TPE (MSE=0.103151)


In [15]:
import optuna.visualization as vis
vis.plot_optimization_history(study)
vis.plot_optimization_history(gp_study)
vis.plot_optimization_history(tpe_study)

# Comparison

Among the three Optuna sampling strategies - **RandomSampler**, **GPSampler, and TPESampler**—the results reveal clear differences in optimization performance. The **RandomSampler** explores the search space purely by chance, often requiring more trials to find good solutions, while the **GPSampler** uses a Gaussian Process to model the objective function and tends to perform better when the search space is smooth and continuous. However, in this experiment, the **TPESampler** achieved the **lowest Mean Squared Error (MSE)**, indicating its superior ability to balance exploration and exploitation in hyperparameter tuning for the GRU model. Overall, **TPE** demonstrated the most efficient convergence and produced the most accurate predictions on the validation set compared to the Random and GP methods.

In [16]:
#Saving Results
# results = pd.DataFrame({
#     "Sampler": ["Random", "GP", "TPE"],
#     "Best_MSE": [study.best_value, gp_study.best_value, tpe_study.best_value]
# })
# results.to_csv("optuna_mse_comparison.csv", index=False)