# Construction of IO dataset for Power Flow BBMs


In [1]:
# Importing libraries
import os
from pathlib import Path
import dill as pickle
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tqdm

import greyboxmodels.cpsmodels.physical.electrical.PowerFlowPowerGrid as PG
import greyboxmodels.cpsmodels.Plant as Plant

# Set the working directory
os.chdir("D:/projects/IPTLC_BBMs")
print(f"Current working directory: {os.getcwd()}")

Current working directory: D:\projects\IPTLC_BBMs


In [2]:
# Data folder path
data_folder = Path("D:/projects/Hierarchical_CPS_models/data/simulations/controlled_pg/20240227_195448")

# Specify the path to save the processed data and create the folder if it doesn't exist
processed_data_folder = Path("data/PF/") / data_folder.name
os.makedirs(processed_data_folder, exist_ok=True)

# Print the folders
print(f"Data folder: {data_folder}")
print(f"Processed data folder: {processed_data_folder}")

Data folder: D:\projects\Hierarchical_CPS_models\data\simulations\controlled_pg\20240227_195448
Processed data folder: data\PF\20240227_195448


In [8]:
# Create a function that receives a path to a simulation and returns the inputs and outputs
def get_pf_data(filepath: Path):
    # Open the data
    with open(filepath, "rb") as f:
        sim_data = pickle.load(f)

    # Get the inputs and outputs
    pf_inputs = np.array([x['power_grid_step_data']["power_flow_input"] for x in sim_data['step_data']])
    pf_outputs = np.array([x['power_grid_step_data']["power_flow_output"] for x in sim_data['step_data']])

    return pf_inputs, pf_outputs

# Test the function
target_simulation = data_folder / "simulation_0.pkl"
pf_inputs, pf_outputs = get_pf_data(target_simulation)
print(f"Testing the file: {target_simulation}")
print(f"CC inputs shape: {pf_inputs.shape}")
print(f"CC outputs shape: {pf_outputs.shape}")

Testing the file: D:\projects\Hierarchical_CPS_models\data\simulations\controlled_pg\20240227_195448\simulation_0.pkl
CC inputs shape: (384, 56)
CC outputs shape: (384, 105)


In [9]:
# Now, a function that iterates over all the simulations and returns the inputs and outputs in a single numpy array
def get_pf_data_all(data_folder: Path):
    # Create an empty list to store the inputs and outputs
    inputs = []
    outputs = []

    # Create a list of the target folders: they are called "simulation_0.pkl", "simulation_1.pkl", etc.
    target_folders = [f for f in data_folder.iterdir() if f.is_file() and f.name.startswith("simulation")]

    # Iterate over all the simulations and get the inputs and outputs for each one
    for f in tqdm.tqdm(target_folders):
        # Get the inputs and outputs
        pf_inputs, pf_outputs = get_pf_data(f)

        # Append the inputs and outputs to the lists
        inputs.append(pf_inputs)
        outputs.append(pf_outputs)

    # Concatenate the inputs and outputs
    inputs_matrix = np.concatenate(inputs, axis=0)
    outputs_matrix = np.concatenate(outputs, axis=0)

    # Get the plant
    with open(data_folder / "plant.pkl", "rb") as f:
        plant = pickle.load(f)

    return inputs_matrix, outputs_matrix, plant

# Test the function
pf_inputs, pf_outputs, plant = get_pf_data_all(data_folder)
print(f"CC inputs shape: {pf_inputs.shape}")
print(f"CC outputs shape: {pf_outputs.shape}")

100%|██████████| 132/132 [02:09<00:00,  1.02it/s]

CC inputs shape: (50688, 56)
CC outputs shape: (50688, 105)





In [48]:
# Save the inputs and outputs to numpy arrays
inputs_path = processed_data_folder / "pf_inputs.npy"
outputs_path = processed_data_folder / "pf_outputs.npy"

np.save(inputs_path, pf_inputs)
np.save(outputs_path, pf_outputs)

## Normalize the inputs and outputs

In [49]:
# Create a function to normalize an array as above
def min_max_normalize(array: np.ndarray, min_array: np.ndarray = None, max_array: np.ndarray = None):
    if min_array is None:
        min_array = array.min(axis=0)
        max_array = array.max(axis=0)

        min_array[min_array == max_array] = min_array[min_array == max_array] - 1
        max_array[min_array == max_array] = max_array[min_array == max_array]

    array_normalized = (array - min_array) / (max_array - min_array)

    return array_normalized, min_array, max_array

In [50]:
# Inputs
pf_inputs_normalized, min_pf_input, max_pf_input = min_max_normalize(pf_inputs)

pf_inputs_normalized[:5, :]

array([[2.99319685e-01, 3.78204287e-01, 2.60249355e-01, 3.02758484e-01,
        3.35735142e-01, 3.20384746e-01, 2.96569026e-01, 3.14446245e-01,
        3.38502320e-01, 5.29905072e-01, 3.08272698e-01, 3.77345490e-01,
        3.14815730e-01, 6.55954853e-01, 3.00686689e-01, 2.84565466e-01,
        3.94535675e-01, 2.48362937e-01, 3.04020025e-01, 3.21291795e-01,
        3.22426194e-01, 2.92110176e-01, 1.00000000e+00, 5.59964457e-01,
        4.14008139e-01, 3.50364095e-07, 5.43783963e-01, 5.24077810e-01,
        4.40589273e-01, 9.83800924e-01, 9.99904234e-01, 1.00000000e+00,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.000000

In [51]:
# Outputs
pf_outputs_normalized, min_pf_output, max_pf_output = min_max_normalize(pf_outputs)

pf_outputs_normalized[:5, :]

array([[2.99319685e-01, 3.78204287e-01, 2.60249355e-01, 3.02758484e-01,
        3.35735142e-01, 3.20384746e-01, 2.96569026e-01, 3.14446245e-01,
        3.38502320e-01, 5.29905072e-01, 3.08272698e-01, 3.77345490e-01,
        3.14815730e-01, 6.55954853e-01, 3.00686689e-01, 2.84565466e-01,
        3.94535675e-01, 2.48362937e-01, 3.04020025e-01, 3.21291795e-01,
        3.22426194e-01, 2.92110176e-01, 4.14007920e-01, 3.50364095e-07,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 4.42832764e-05,
        4.00338571e-01, 4.18402881e-01, 6.50213492e-02, 6.46570572e-01,
        5.43783963e-01, 5.24077810e-01, 4.40589273e-01, 5.19044524e-01,
        5.25348764e-01, 9.83800924e-01, 6.41333558e-01, 9.99904234e-01,
        9.99971867e-01, 8.98090921e-01, 8.50038349e-01, 6.66406088e-01,
        6.68224295e-01, 7.52752894e-01, 1.00000000e+00, 5.59964912e-01,
        6.08943870e-01, 6.28145626e-01, 6.23777092e-01, 6.20881739e-01,
        6.30606700e-01, 6.30606700e-01, 6.31493121e-01, 6.303405

In [52]:
# Save the normalized inputs and outputs to numpy arrays
inputs_normalized_path = processed_data_folder / "pf_inputs_minmax_normalized.npy"
outputs_normalized_path = processed_data_folder / "pf_outputs_minmax_normalized.npy"

np.save(inputs_normalized_path, pf_inputs_normalized)
np.save(outputs_normalized_path, pf_outputs_normalized)

# Also, save the min and max values
min_max_values = {
    "min_pf_input": min_pf_input,
    "max_pf_input": max_pf_input,
    "min_pf_output": min_pf_output,
    "max_pf_output": max_pf_output
}

min_max_values_path = processed_data_folder / "norm_min_max_values.pkl"
with open(min_max_values_path, "wb") as f:
    pickle.dump(min_max_values, f)

## Get the ground truth data

In [53]:
# Choose a simulation
gt_sim = data_folder / "simulation_5.pkl"

# Open the minimum and maximum values
with open(min_max_values_path, "rb") as f:
    min_max_values = pickle.load(f)

min_pf_input = min_max_values["min_pf_input"]
max_pf_input = min_max_values["max_pf_input"]
min_pf_output = min_max_values["min_pf_output"]
max_pf_output = min_max_values["max_pf_output"]

# Do as above and save to a file
gt_inputs, gt_outputs = get_pf_data(gt_sim)

gt_inputs_normalized, _, _ = min_max_normalize(gt_inputs, min_pf_input, max_pf_input)
gt_outputs_normalized, _, _ = min_max_normalize(gt_outputs, min_pf_output, max_pf_output)

gt_inputs_normalized_path = processed_data_folder / "gt_inputs_minmax_normalized.npy"
gt_outputs_normalized_path = processed_data_folder / "gt_outputs_minmax_normalized.npy"

np.save(gt_inputs_normalized_path, gt_inputs_normalized)
np.save(gt_outputs_normalized_path, gt_outputs_normalized)