In [1]:
import pandas as pd
import numpy as np
import copy
from typing import Tuple, List
import pre_process_data_utils
import state_extractor
import grid2op
from tqdm import tqdm
from numba.core.errors import NumbaWarning
import warnings
warnings.simplefilter('ignore', category=NumbaWarning)

<a class="anchor" id="nutshell"></a>
<h2 style="font-family:'Verdana',sans-serif; color:#1D7874;">1. Convert observations to features (data frame) </h2>

In [2]:
# Create a dataframe of features from several files containing stored observations
fname_list = [f"data/raw/obs_java_uni01_v4_{i}.pickle" for i in range (0, 5)]
fname_list += [f"data/raw/obs_java_uni005_v4_{i}.pickle" for i in range (5, 10)]
idx = [11, 12, 13, 14, 16, 17, 18, 19] # file no 10 and 15 was corrupted
fname_list += [f"data/raw/obs_java_uni005_v4_{i}.pickle" for i in idx]
fname_list += [f"data/raw/obs_java_uni005_v4_{i}.pickle" for i in range (20, 60)]
X_list = []
for fname in tqdm(fname_list):
    X = pre_process_data_utils.get_features_from_obs_list(
        fname,
        state_extractor_class=state_extractor.RhoMaintenanceDates,
        #state_extractor_kwargs={"area": 1}
        )
    X_list.append(X)
X_comb = pd.concat(X_list, axis=0)
X_comb.index = [i for i in range(len(X_comb))]
X_comb.to_csv("data/X_java_minimal.csv", sep=",")


  0%|          | 0/58 [00:00<?, ?it/s]numba cannot be imported and numba functions are disabled.
Probably the execution is slow.
Please install numba to gain a massive speedup.

numba cannot be imported and numba functions are disabled.
Probably the execution is slow.
Please install numba to gain a massive speedup.

numba cannot be imported and numba functions are disabled.
Probably the execution is slow.
Please install numba to gain a massive speedup.

100%|██████████| 8000/8000 [01:42<00:00, 77.97it/s]
  2%|▏         | 1/58 [01:51<1:46:20, 111.94s/it]numba cannot be imported and numba functions are disabled.
Probably the execution is slow.
Please install numba to gain a massive speedup.

numba cannot be imported and numba functions are disabled.
Probably the execution is slow.
Please install numba to gain a massive speedup.

numba cannot be imported and numba functions are disabled.
Probably the execution is slow.
Please install numba to gain a massive speedup.

100%|██████████| 8000

<a class="anchor" id="nutshell"></a>
<h2 style="font-family:'Verdana',sans-serif; color:#1D7874;">2. Combine target data into one file </h2>

In [4]:
# Combine target dataframes into single file
fname_list = [f"data/raw/Y_java_uni01_v4_{i}.csv" for i in range (0, 5)]
fname_list += [f"data/raw/Y_java_uni005_v4_{i}.csv" for i in range (5, 10)]
idx = [11, 12, 13, 14, 16, 17, 18, 19]
fname_list += [f"data/raw/Y_java_uni005_v4_{i}.csv" for i in idx]
fname_list += [f"data/raw/Y_java_uni005_v4_{i}.csv" for i in range (20, 60)]
pre_process_data_utils.combine_data(
    fname_list,
    "data/Y_java_new.csv",
)


In [5]:
pre_process_data_utils.combine_data(
    ["data/X_java_comp_maintenance.csv", "data/X_java_comp_maintenance_pt2.csv"],
    "data/X_java_comp_maintenance.csv",
)

<a class="anchor" id="nutshell"></a>
<h2 style="font-family:'Verdana',sans-serif; color:#1D7874;"> 3. Create target data. Its sufficient to choose one of a-d </h2>

<a class="anchor" id="nutshell"></a>
<h2 style="font-family:'Verdana',sans-serif; color:#1D7874;">a) Create targets for predicting agent survival</h2>

In [5]:
# Process Y for predicting survival per contingency 
load_path = "data/Y_java_new.csv"
save_path = "data/Y_java_survival_new.csv"

Y = pd.read_csv(load_path, index_col=0)
cols = [col for col in Y.columns if "survival_cont_" in col]
Y = Y[cols]
Y.to_csv(save_path, sep=",")

<a class="anchor" id="nutshell"></a>
<h2 style="font-family:'Verdana',sans-serif; color:#1D7874;">b) Create targets for predicting failure timestep </h2>

In [None]:
# Process Y data for predicting grid failure timestep
load_path = "data/Y_java.csv"
save_path = "data/Y_java_fail_t.csv"

Y = pd.read_csv(
    load_path,
    index_col=0)
cols = [col for col in Y.columns if "fail_t_cont" in col]
Y_fail_t = Y[cols]
horizon = 12
Y_new = pre_process_data_utils.process_Y_failure_t_data(Y_fail_t, horizon)
Y_new.to_csv(save_path, sep=",")

<a class="anchor" id="nutshell"></a>
<h2 style="font-family:'Verdana',sans-serif; color:#1D7874;">c) Create targets for predicting survival in specific area </h2>

In [None]:
# Save Y_survival data belonging to specified area
Y_survival = pd.read_csv(
    "data/Y_java_survival.csv",
    index_col=0)
area = 1
env = grid2op.make("l2rpn_idf_2023")
lines_by_area = list(env._game_rules.legal_action.lines_id_by_area.values())
lines_area = lines_by_area[area]
Y_processed = pre_process_data_utils.get_Y_survival_for_area(Y_survival, lines_area)
Y_processed.to_csv(
    f"data/Y_java_survival_area_{area}.csv",
    sep=","
    )

<a class="anchor" id="nutshell"></a>
<h2 style="font-family:'Verdana',sans-serif; color:#1D7874;">d) Create target for predicting surival in 1D format (untested) </h2>

In [None]:
# Process X and Y for predicting survival in 1D format
load_path_x = "data/X_java_maintenance.csv"
load_path_y = "data/Y_java_comb.csv"
save_path_x = "data/X_java_maintenance_1D.csv"
save_path_y = "data/Y_java_comb_1D.csv"

X = pd.read_csv(
    load_path_x,
    index_col=0)
Y = pd.read_csv(
    load_path_y,
    index_col=0)
cols = [col for col in Y.columns if "survival_cont_" in col]
Y_survival = Y[cols]
Y_surival = Y_survival.iloc[X.index]
X_new, Y_new = pre_process_data_utils.make_Y_1D(X, Y_survival)
X_new.to_csv(save_path_x, sep=",")
Y_new.to_csv(save_path_y, sep=",")

<a class="anchor" id="nutshell"></a>
<h2 style="font-family:'Verdana',sans-serif; color:#1D7874;">4. Create training and testing set using feature and target data </h2>

In [3]:
pre_process_data_utils.make_train_test_files(
    x_path="data/X_java.csv",
    y_path="data/Y_java_survival.csv",
    save_dirname="data/test",
    #x_path="data/X_java_comp_main.csv",
    #y_path="data/Y_java_survival.csv",
    #save_dirname="data/java_comp_maintenance_survival",
    seed=15,
)