# Construction of IO dataset for OPF BBMS

In this notebook, we aim at constructing


In [5]:
# Importing libraries
import os
from pathlib import Path
import dill as pickle
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tqdm

import greyboxmodels.cpsmodels.physical.electrical.PowerFlowPowerGrid as PG
import greyboxmodels.cpsmodels.Plant as Plant

# Set the working directory
os.chdir("D:/projects/IPTLC_BBMs")
print(f"Current working directory: {os.getcwd()}")

Current working directory: D:\projects\IPTLC_BBMs


In [6]:
# Data folder path
data_folder = Path("D:/projects/Hierarchical_CPS_models/data/simulations/controlled_pg/20240226_184851/")

# Target simulation
target_simulation = "simulation_0.pkl"
target_simulation = data_folder / target_simulation

# Specify the path to save the processed data and create the folder if it doesn't exist
processed_data_folder = Path("data/OPF/") / data_folder.name
os.makedirs(processed_data_folder, exist_ok=True)

# Print the folders
print(f"Data folder: {data_folder}")
print(f"Target simulation: {target_simulation}")
print(f"Processed data folder: {processed_data_folder}")

Data folder: D:\projects\Hierarchical_CPS_models\data\simulations\controlled_pg\20240226_184851
Target simulation: D:\projects\Hierarchical_CPS_models\data\simulations\controlled_pg\20240226_184851\simulation_0.pkl
Processed data folder: data\OPF\20240226_184851


In [7]:
# Open the data
with open(target_simulation, "rb") as f:
    sim_data = pickle.load(f)

sim_data.keys()

dict_keys(['time', 'external_stimuli', 'state', 'step_data', 'plant'])

In [8]:
sim_data['step_data'][0].keys() # CC input: 'cc_input'. CC output: 'pg_control_input'

dict_keys(['time', 'state_pre_update', 'state_post_update', 'power_demands', 'cc_input', 'opf_cost', 'pg_control_input', 'pg_response', 'power_grid_step_data', 'control_center_step_data'])

In [9]:
# Open the plant
with open(data_folder / "plant.pkl", "rb") as f:
    plant = pickle.load(f)

plant

<greyboxmodels.cpsmodels.cyberphysical.ControlledPowerGrid.ControlledPowerGrid.ControlledPowerGrid at 0x271cf3d3d90>

In [10]:
# Get the power grid and the control center
power_grid = plant.power_grid
control_center = plant.control_center

In [11]:
# Get the name of the states
# name_mapping = {'Pd': 'Active power demand',
#                 'Qd': 'Reactive power demand',
#                 'Pg': 'Active power generation',
#                 'Qg': 'Reactive power generation',
#                 'Vm': 'Voltage magnitude',
#                 'Va': 'Voltage angle',
#                 'ILine': 'Line current',
#                 'LTrafo': 'Transformer loading',
#                 'piGen': 'Generator status',
#                 'piLine': 'Line status',
#                 'piTrafo': 'Transformer status'}
name_mapping = None

pg_states = Plant.get_variables_names(plant.power_grid.state_idx, name_mapping)
pg_ext_stims = Plant.get_variables_names(plant.power_grid.uncontrolled_inputs_idx, name_mapping)
pg_cont_inputs = Plant.get_variables_names(plant.power_grid.controlled_inputs_idx, name_mapping)

cc_states = Plant.get_variables_names(plant.control_center.state_idx, name_mapping)
cc_cont_inputs = Plant.get_variables_names(plant.control_center.controlled_inputs_idx, name_mapping)

print(f"Power grid states: {pg_states}\n")
print(f"Power grid external stimuli: {pg_ext_stims}\n")
print(f"Power grid control inputs: {pg_cont_inputs}\n")

print(f"Control center states: {cc_states}\n")
print(f"Control center control inputs: {cc_cont_inputs}\n")

Power grid states: ['Pd 0', 'Pd 1', 'Pd 2', 'Pd 3', 'Pd 4', 'Pd 5', 'Pd 6', 'Pd 7', 'Pd 8', 'Pd 9', 'Pd 10', 'Qd 0', 'Qd 1', 'Qd 2', 'Qd 3', 'Qd 4', 'Qd 5', 'Qd 6', 'Qd 7', 'Qd 8', 'Qd 9', 'Qd 10', 'Pg 0', 'Pg 1', 'Pg 2', 'Pg 3', 'Pg 4', 'Qg 0', 'Qg 1', 'Qg 2', 'Qg 3', 'Qg 4', 'Vm 0', 'Vm 1', 'Vm 2', 'Vm 3', 'Vm 4', 'Vm 5', 'Vm 6', 'Vm 7', 'Vm 8', 'Vm 9', 'Vm 10', 'Vm 11', 'Vm 12', 'Vm 13', 'Va 0', 'Va 1', 'Va 2', 'Va 3', 'Va 4', 'Va 5', 'Va 6', 'Va 7', 'Va 8', 'Va 9', 'Va 10', 'Va 11', 'Va 12', 'Va 13', 'ILine 0', 'ILine 1', 'ILine 2', 'ILine 3', 'ILine 4', 'ILine 5', 'ILine 6', 'ILine 7', 'ILine 8', 'ILine 9', 'ILine 10', 'ILine 11', 'ILine 12', 'ILine 13', 'ILine 14', 'LTrafo 0', 'LTrafo 1', 'LTrafo 2', 'LTrafo 3', 'LTrafo 4', 'piGen 0', 'piGen 1', 'piGen 2', 'piGen 3', 'piGen 4', 'piLine 0', 'piLine 1', 'piLine 2', 'piLine 3', 'piLine 4', 'piLine 5', 'piLine 6', 'piLine 7', 'piLine 8', 'piLine 9', 'piLine 10', 'piLine 11', 'piLine 12', 'piLine 13', 'piLine 14', 'piTrafo 0', 'piTraf

In [12]:
sim_data['step_data'][0]['control_center_step_data'].keys()

dict_keys(['opf_options', 'opf_input', 'opf_output'])

In [13]:
# Get inputs and outputs
opf_inputs = np.array([x['control_center_step_data']["opf_input"] for x in sim_data['step_data']])
opf_outputs = np.array([x['control_center_step_data']["opf_output"] for x in sim_data['step_data']])

# Shapes
print(f"CC inputs shape: {opf_inputs.shape}")
print(f"CC outputs shape: {opf_outputs.shape}")

CC inputs shape: (384, 51)
CC outputs shape: (384, 9)


In [14]:
# Create a function that receives a path to a simulation and returns the inputs and outputs
def get_opf_data(filepath: Path):
    # Open the data
    with open(filepath, "rb") as f:
        sim_data = pickle.load(f)

    # Get the inputs and outputs
    opf_inputs = np.array([x['control_center_step_data']["opf_input"] for x in sim_data['step_data']])
    opf_outputs = np.array([x['control_center_step_data']["opf_output"] for x in sim_data['step_data']])

    return opf_inputs, opf_outputs

# Test the function
opf_inputs, opf_outputs = get_opf_data(target_simulation)
print(f"Testing the file: {target_simulation}")
print(f"CC inputs shape: {opf_inputs.shape}")
print(f"CC outputs shape: {opf_outputs.shape}")

Testing the file: D:\projects\Hierarchical_CPS_models\data\simulations\controlled_pg\20240226_184851\simulation_0.pkl
CC inputs shape: (384, 51)
CC outputs shape: (384, 9)


In [15]:
# Now, a function that iterates over all the simulations and returns the inputs and outputs in a single numpy array
def get_opf_data_all(data_folder: Path):
    # Create an empty list to store the inputs and outputs
    inputs = []
    outputs = []

    # Create a list of the target folders: they are called "simulation_0.pkl", "simulation_1.pkl", etc.
    target_folders = [f for f in data_folder.iterdir() if f.is_file() and f.name.startswith("simulation")]

    # Iterate over all the simulations and get the inputs and outputs for each one
    for f in tqdm.tqdm(target_folders):
        # Get the inputs and outputs
        opf_inputs, opf_outputs = get_opf_data(f)

        # Append the inputs and outputs to the lists
        inputs.append(opf_inputs)
        outputs.append(opf_outputs)

    # Concatenate the inputs and outputs
    inputs_matrix = np.concatenate(inputs, axis=0)
    outputs_matrix = np.concatenate(outputs, axis=0)

    # Get the plant
    with open(data_folder / "plant.pkl", "rb") as f:
        plant = pickle.load(f)

    return inputs_matrix, outputs_matrix, plant

# Test the function
opf_inputs, opf_outputs, plant = get_opf_data_all(data_folder)
print(f"CC inputs shape: {opf_inputs.shape}")
print(f"CC outputs shape: {opf_outputs.shape}")

100%|██████████| 21/21 [00:21<00:00,  1.02s/it]

CC inputs shape: (8064, 51)
CC outputs shape: (8064, 9)





In [18]:
# Save the inputs and outputs to numpy arrays
inputs_path = processed_data_folder / "opf_inputs.npy"
outputs_path = processed_data_folder / "opf_outputs.npy"

np.save(inputs_path, opf_inputs)
np.save(outputs_path, opf_outputs)

## Normalize the inputs and outputs

In [27]:
# Create a function to normalize an array as above
def min_max_normalize(array: np.ndarray, min_array: np.ndarray = None, max_array: np.ndarray = None):
    if min_array is None:
        min_array = array.min(axis=0)
        max_array = array.max(axis=0)

        min_array[min_array == max_array] = min_array[min_array == max_array] - 1
        max_array[min_array == max_array] = max_array[min_array == max_array]

    array_normalized = (array - min_array) / (max_array - min_array)

    return array_normalized, min_array, max_array

In [28]:
# Inputs
opf_inputs_normalized, min_opf_input, max_opf_input = min_max_normalize(opf_inputs)

opf_inputs_normalized[:5, :]

array([[0.28839297, 0.35232922, 0.33112368, 0.27924059, 0.28473637,
        0.31066581, 0.29018455, 0.27880947, 0.18634716, 0.25922208,
        0.29725575, 0.34607905, 0.35242928, 0.76018325, 0.33308821,
        0.29025568, 0.34156017, 0.31093007, 0.29491433, 0.39408146,
        0.39448969, 0.24109994, 0.5081227 , 0.48963792, 1.        ,
        0.64723805, 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        ],
       [0.15365078, 0.28729384, 0.29119459, 0.32318096, 0.33462806,
        0.34998045, 0.39808701, 0.21850338, 0.21137091, 0.30809828,
        0.25546785, 0.34659303, 0.29735035, 0.7886959 , 0.3398552 ,
        0.29303315, 0.23151253, 0.24514833, 0.15331842, 0.3579526 ,
        0.43636306, 0.28110

In [29]:
# Outputs
opf_outputs_normalized, min_opf_output, max_opf_output = min_max_normalize(opf_outputs)

opf_outputs_normalized[:5, :]

array([[1.00000000e+00, 5.92387418e-01, 3.84754534e-01, 4.97965425e-07,
        5.08122700e-01, 4.89637924e-01, 4.03956782e-01, 9.74935497e-01,
        9.99861988e-01],
       [1.00000000e+00, 6.35593351e-01, 3.45320446e-01, 4.62315130e-07,
        4.08972795e-01, 3.84161545e-01, 3.01613386e-01, 9.61610694e-01,
        9.82675872e-01],
       [1.00000000e+00, 6.11895847e-01, 3.62392382e-01, 5.69677301e-07,
        4.22266387e-01, 3.91848290e-01, 2.97497895e-01, 9.56283251e-01,
        9.72825696e-01],
       [1.00000000e+00, 6.27605492e-01, 3.47120120e-01, 4.73392495e-07,
        3.97871180e-01, 3.67400091e-01, 2.90577697e-01, 9.59560696e-01,
        9.73511779e-01],
       [1.00000000e+00, 6.65867405e-01, 3.16406307e-01, 2.60514300e-07,
        3.93423510e-01, 3.73797146e-01, 3.08376837e-01, 9.83157293e-01,
        9.99748788e-01]])

In [25]:
# Save the normalized inputs and outputs to numpy arrays
inputs_normalized_path = processed_data_folder / "opf_inputs_minmax_normalized.npy"
outputs_normalized_path = processed_data_folder / "opf_outputs_minmax_normalized.npy"

np.save(inputs_normalized_path, opf_inputs_normalized)
np.save(outputs_normalized_path, opf_outputs_normalized)

# Also, save the min and max values
min_max_values = {"min_opf_input": min_opf_input,
                  "max_opf_input": max_opf_input,
                  "min_opf_output": min_opf_output,
                  "max_opf_output": max_opf_output}

min_max_values_path = processed_data_folder / "norm_min_max_values.pkl"
with open(min_max_values_path, "wb") as f:
    pickle.dump(min_max_values, f)

# Ground truth data

In [31]:
# Choose a simulation
gt_sim = data_folder / "simulation_5.pkl"

# Open the minimum and maximum values
with open(min_max_values_path, "rb") as f:
    min_max_values = pickle.load(f)

min_opf_input = min_max_values["min_opf_input"]
max_opf_input = min_max_values["max_opf_input"]
min_opf_output = min_max_values["min_opf_output"]
max_opf_output = min_max_values["max_opf_output"]

# Do as above and save to a file
gt_inputs, gt_outputs = get_opf_data(gt_sim)

gt_inputs_normalized, _, _ = min_max_normalize(gt_inputs, min_opf_input, max_opf_input)
gt_outputs_normalized, _, _ = min_max_normalize(gt_outputs, min_opf_output, max_opf_output)

gt_inputs_normalized_path = processed_data_folder / "gt_inputs_minmax_normalized.npy"
gt_outputs_normalized_path = processed_data_folder / "gt_outputs_minmax_normalized.npy"

np.save(gt_inputs_normalized_path, gt_inputs_normalized)
np.save(gt_outputs_normalized_path, gt_outputs_normalized)