In [5]:
 #Install the missing 'optuna' library
!pip install optuna



In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import optuna
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score


In [7]:
# Load
df = pd.read_csv("/content/energy_iter13.csv")

In [8]:
target_column = "Demand(MW)"

X = df.drop(columns=['Demand(MW)','Date','DemandGenGap(MW)']).values  # Features
y = df['Demand(MW)'].values  # Target

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

# Define Transformer Model
class TransformerModel(nn.Module):
    def __init__(self, input_dim, model_dim, num_heads, num_layers, dropout):
        super(TransformerModel, self).__init__()

        # Ensure model_dim is divisible by num_heads
        model_dim = (model_dim // num_heads) * num_heads

        self.input_layer = nn.Linear(input_dim, model_dim)
        self.encoder_layer = nn.TransformerEncoderLayer(
            d_model=model_dim, nhead=num_heads, dropout=dropout
        )
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
        self.output_layer = nn.Linear(model_dim, 1)

    def forward(self, x):
        x = self.input_layer(x)
        x = self.transformer_encoder(x)
        x = self.output_layer(x)
        return x

#Define Objective Function for Hyperparameter Tuning
def objective(trial):

    # Define hyperparameter search space
    num_heads = trial.suggest_categorical("num_heads", [2, 4, 8, 16])
    model_dim = trial.suggest_int("model_dim", 32, 256, step=num_heads)  # Ensure divisibility
    num_layers = trial.suggest_int("num_layers", 2, 6)
    dropout = trial.suggest_float("dropout", 0.1, 0.5)
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-3, log=True)

    # Initialize model
    model = TransformerModel(
        input_dim=X.shape[1], model_dim=model_dim, num_heads=num_heads,
        num_layers=num_layers, dropout=dropout
    )

    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.MSELoss()

    # Training loop
    for epoch in range(50):
        model.train()
        optimizer.zero_grad()
        predictions = model(X_train_tensor)
        loss = criterion(predictions, y_train_tensor)
        loss.backward()
        optimizer.step()

    # Evaluation
    model.eval()
    with torch.no_grad():
        predictions = model(X_test_tensor).numpy().flatten()

    r2 = r2_score(y_test, predictions)
    return r2  # We maximize R² score

# Hyperparameter Optimization
study = optuna.create_study(direction="maximize")  # Maximize R²
study.optimize(objective, n_trials=20)  # Run 20 trials

# Get best hyperparameters
best_params = study.best_params
print("Best Hyperparameters:", best_params)

# Train final model with best parameters
best_model = TransformerModel(
    input_dim=X.shape[1], model_dim=best_params["model_dim"],
    num_heads=best_params["num_heads"], num_layers=best_params["num_layers"],
    dropout=best_params["dropout"]
)

optimizer = optim.Adam(best_model.parameters(), lr=best_params["learning_rate"])
criterion = nn.MSELoss()

# Train Final Model
for epoch in range(50):
    best_model.train()
    optimizer.zero_grad()
    predictions = best_model(X_train_tensor)
    loss = criterion(predictions, y_train_tensor)
    loss.backward()
    optimizer.step()

# Evaluate Final Model
best_model.eval()
with torch.no_grad():
    final_predictions = best_model(X_test_tensor).numpy().flatten()

final_r2 = r2_score(y_test, final_predictions)
print(f"Final Model R² Score: {final_r2:.4f}")

[I 2025-03-20 21:53:17,102] A new study created in memory with name: no-name-24cfcab1-006f-46d5-ac30-e10dcde29107
[I 2025-03-20 21:57:23,531] Trial 0 finished with value: 0.8665233313289306 and parameters: {'num_heads': 16, 'model_dim': 240, 'num_layers': 3, 'dropout': 0.11949460335126948, 'learning_rate': 5.444892610089943e-05}. Best is trial 0 with value: 0.8665233313289306.
[I 2025-03-20 22:00:27,218] Trial 1 finished with value: 0.6549784446745301 and parameters: {'num_heads': 8, 'model_dim': 64, 'num_layers': 5, 'dropout': 0.46246390029752404, 'learning_rate': 2.156551679176122e-05}. Best is trial 0 with value: 0.8665233313289306.
[I 2025-03-20 22:02:48,708] Trial 2 finished with value: 0.877099015253712 and parameters: {'num_heads': 8, 'model_dim': 240, 'num_layers': 3, 'dropout': 0.2812264898020279, 'learning_rate': 2.3602694026180796e-05}. Best is trial 2 with value: 0.877099015253712.
[I 2025-03-20 22:04:43,276] Trial 3 finished with value: 0.5652374297517123 and parameters: {

Best Hyperparameters: {'num_heads': 16, 'model_dim': 176, 'num_layers': 4, 'dropout': 0.1311298992466105, 'learning_rate': 6.342427953433506e-05}
Final Model R² Score: 0.7639
