# Simple ML-OPF Colab Notebook

This notebook provides a simplified interface for training ML models for Optimal Power Flow (OPF) problems in Google Colab.

## 1. Setup

First, we need to set up the environment and clone the repository.

In [None]:
# Install required packages
!pip install torch>=2.0.0 numpy>=1.20.0 matplotlib>=3.5.0 pandas>=1.3.0 scikit-learn>=1.0.0 scipy>=1.7.0 pypower>=5.1.0 networkx>=2.6.0 torch-geometric>=2.0.0 tqdm>=4.60.0

In [None]:
# Clone the repository if not already done
import os
if not os.path.exists('ML_AC_OPF'):
    !git clone https://github.com/yourusername/ML_AC_OPF.git

# Change to the repository directory
%cd ML_AC_OPF

In [None]:
# Download required data
!python scripts/download_data.py --case case118

## 2. Import Libraries

In [None]:
import os
import sys
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
import networkx as nx

# Add this to ensure imports work correctly
sys.path.append('.')

# Import project modules
from utils.data_utils import load_pglib_data, load_case_network, prepare_data_loaders, create_power_network_graph, OPFDataset
from utils.training import Trainer, optimality_gap_metric
from models.feedforward import FeedForwardNN, WarmStartNN
from models.gnn import TopologyAwareGNN, HybridGNN, prepare_pyg_data

## 3. Load and Prepare Data

In [None]:
# Set parameters
case_name = 'case118'
data_dir = 'data'
batch_size = 32
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Create log directory
log_dir = 'logs'
os.makedirs(log_dir, exist_ok=True)

# Load data
print(f"Loading data for {case_name}...")
data = load_pglib_data(case_name, data_dir)
case_data = load_case_network(case_name, data_dir)
print(f"Data loaded: {len(data)} samples")

# Extract input and output columns based on CSV column format
input_cols = [col for col in data.columns if col.startswith('load')]
if not input_cols:
    # Try alternative naming pattern
    input_cols = [col for col in data.columns if ':pl' in col or ':ql' in col]

output_cols = [col for col in data.columns if col.startswith('gen') or col.startswith('bus')]
if not output_cols:
    # Try alternative naming pattern
    output_cols = [col for col in data.columns if ':pg' in col or ':qg' in col or ':v_' in col]

print(f"Input features: {len(input_cols)}")
print(f"Output features: {len(output_cols)}")

# Prepare data loaders
train_loader, val_loader, test_loader = prepare_data_loaders(
    data, input_cols, output_cols, batch_size=batch_size)

## 4. Train a Feedforward Neural Network

In [None]:
# Define model parameters
input_dim = len(input_cols)
output_dim = len(output_cols)
hidden_dims = [128, 256, 128]
learning_rate = 0.001
epochs = 10

# Initialize model
model = FeedForwardNN(input_dim=input_dim, output_dim=output_dim, hidden_dims=hidden_dims)
model = model.to(device)

# Try to create cost coefficients for metrics
try:
    cost_coeffs = torch.tensor(
        [coef[5] for coef in case_data['gencost']], 
        dtype=torch.float32,
        device=device
    )
    metrics = {
        'opt_gap': lambda pred, target: optimality_gap_metric(pred, target, cost_coeffs)
    }
except (IndexError, KeyError) as e:
    print(f"Warning: Error getting cost coefficients: {e}. Using MSE metrics only.")
    metrics = {}

# Initialize trainer
trainer = Trainer(
    model=model,
    optimizer=torch.optim.Adam(model.parameters(), lr=learning_rate),
    criterion=torch.nn.MSELoss(),
    device=device,
    log_dir=log_dir
)

# Train model
print(f"Training feedforward model for {epochs} epochs...")
history = trainer.train(
    train_loader=train_loader,
    val_loader=val_loader,
    epochs=epochs,
    metrics=metrics,
    save_best=True,
    early_stopping=5
)

## 5. Evaluate the Model

In [None]:
# Plot training history
trainer.plot_history()

# Load best model and evaluate on test set
trainer.load_best_model()
test_loss, test_metrics = trainer.validate(test_loader, metrics)
print(f"Test Loss: {test_loss:.6f}")
for name, value in test_metrics.items():
    print(f"Test {name}: {value:.6f}")

# Make predictions on test set
predictions, targets = trainer.predict(test_loader)

# Plot predictions vs targets for a few outputs
plt.figure(figsize=(15, 5))
output_indices = min(5, output_dim)
for i in range(output_indices):
    plt.subplot(1, output_indices, i+1)
    plt.scatter(targets[:, i], predictions[:, i], alpha=0.3)
    plt.plot([min(targets[:, i]), max(targets[:, i])], [min(targets[:, i]), max(targets[:, i])], 'r--')
    plt.xlabel('Target')
    plt.ylabel('Prediction')
    plt.title(f'Output {i}')
plt.tight_layout()
plt.savefig(os.path.join(log_dir, 'predictions_vs_targets.png'))
plt.show()

## 6. Try a Graph Neural Network

Now let's try training a GNN model that's aware of the power system topology.

In [None]:
# Create a graph from the case data
G = create_power_network_graph(case_data)
print(f"Created graph with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges")

# Visualize the graph
plt.figure(figsize=(10, 10))
pos = nx.spring_layout(G, seed=42)
nx.draw(G, pos, with_labels=True, node_color='lightblue', node_size=500, font_size=10)
plt.title('Power System Network')
plt.savefig(os.path.join(log_dir, 'power_system_graph.png'))
plt.show()

# For a more detailed implementation of GNN training, you would need to:
# 1. Convert the NetworkX graph to PyTorch Geometric format using prepare_pyg_data
# 2. Create a custom dataset class that returns both features and graph data
# 3. Initialize a GNN model (TopologyAwareGNN or HybridGNN)
# 4. Train the model similar to how we trained the feedforward network

print("For a complete GNN implementation, please refer to scripts/local_gnn.py")

## 7. Save the Model

Finally, let's save the trained model for future use.

In [None]:
# Save the model
model_path = os.path.join(log_dir, f'{case_name}_model.pt')
torch.save({
    'model_state_dict': model.state_dict(),
    'input_cols': input_cols,
    'output_cols': output_cols,
    'hidden_dims': hidden_dims,
    'test_loss': test_loss
}, model_path)
print(f"Model saved to {model_path}")