### This notebook contains the codes to extract and process the necessary simulation data for constructing the machine learning model. The contents must be incorporated to the resilience_metrics.py once finalized.

In [1]:
%load_ext autoreload
%autoreload 2

import warnings
warnings.filterwarnings('ignore')

from IPython.display import clear_output

import os
import pandas as pd
from pathlib import Path
from sklearn import metrics
import statistics
import copy

import infrarisk.src.network_sim_models.interdependencies as interdependencies
from infrarisk.src.network_sim_models.integrated_network import *

In [2]:

network_dir = Path('../../data/networks/micropolis')
water_folder = network_dir/'water'
power_folder = network_dir/'power'

micropolis_network = IntegratedNetwork(name = 'Micropolis', 
                                       water_folder= water_folder,
                                       power_folder = power_folder,
                                       water_sim_type = 'PDA',
                                       power_sim_type='1ph')

Water network successfully loaded from ..\..\data\networks\micropolis\water/water.inp. The analysis type is set to PDA.
initial simulation duration: 60s; hydraulic time step: 60s; pattern time step: 3600s

Power system successfully loaded from ..\..\data\networks\micropolis\power\power.json. Single phase power flow simulation will be used.



In [14]:
# Set scenarios folder
folder = Path('../../data/networks/micropolis/scenarios')
#scenarios = [f for f in sorted(os.listdir(folder))]
scenarios = ['beatrice_sim']

#list of recovery strategies to be considered
#strategies = ['capacity', 'centrality', 'crewdist', 'zone']
strategies = ['capacity']

#create the empty dataframe for ML dataset
ml_df = pd.DataFrame(columns =["scenario",
         "strategy",
         'water_perf_ecs',
         'water_perf_pcs',
         'power_perf_ecs',
         'power_perf_pcs',
         'water_mains',
         "water_pumps", 
         "water_tanks",
         "power_lines", 
         "transpo_links",
         "all_compons"])

abnormal_results = []

junc_list = micropolis_network.wn.junction_name_list
base_water_demands = micropolis_network.base_water_node_supply
base_power_demands = micropolis_network.base_power_supply


In [15]:
for index, scenario in enumerate(scenarios):
    print(index, ". ", scenario)
    ml_df_new = {"scenario": scenario,
         "strategy": None,
         'water_perf_ecs': None,
         'water_perf_pcs': None,
         'power_perf_ecs': None ,
         'power_perf_pcs': None,
         "water_mains": 0, "water_pumps": 0, "water_tanks":0,
         "power_lines": 0, "transpo_links": 0, "all_compons": 0}
    
    disruption_file = pd.read_csv(f"{folder}/{scenario}/disruption_file.csv")
    ml_df_new["all_compons"] = disruption_file.shape[0]
    for _, row in disruption_file.iterrows():
        if row['components'].startswith('P_L'):
            ml_df_new['power_lines'] += 1
        elif row['components'].startswith('W_PMA'):
            ml_df_new['water_mains'] += 1
        elif row['components'].startswith('W_T'):
            ml_df_new['water_tanks'] += 1
        elif row['components'].startswith('W_WP'):
            ml_df_new['water_pumps'] += 1
        elif row['components'].startswith('T_L'):
            ml_df_new['transpo_links'] += 1
        else:
            print("Component type not detectable.")            
    
    for strategy in strategies:  
        ml_df_new['strategy'] = strategy
        water_demands_file = f"{folder}/{scenario}/{strategy}/water_junc_demand.csv"
        power_demands_file = f"{folder}/{scenario}/{strategy}/power_load_demand.csv"
        water_pressure_file = f"{folder}/{scenario}/{strategy}/water_node_pressure.csv"
        
        if os.path.isfile(water_demands_file):
            water_demands = pd.read_csv(water_demands_file, sep = "\t")
            water_time_list = water_demands.time/60
            water_time_list = water_time_list.tolist()
            rel_time_list = water_demands['time'] % (24*3600)
            index_list = [int(x/60) for x in rel_time_list]
            water_demands = water_demands[junc_list]
            
            water_pressures = pd.read_csv(water_pressure_file, sep = "\t")
            water_pressures = water_pressures[junc_list]
            water_press_corrections = copy.deepcopy(water_pressures)
            
            # if micropolis_network.water_sim_type == "DDA":
            #     for index, column in enumerate(water_pressures.columns):
            #         water_press_corrections[column] = water_pressures[column].apply(lambda x: 0 if x <= 0 
            #                 else ((x/wn.options.hydraulic.threshold_pressure)**0.5 if x <= wn.options.hydraulic.threshold_pressure else x))
            #     water_demands = water_demands * water_press_corrections
            
            power_demands = pd.read_csv(power_demands_file, sep = "\t")
            power_time_list = power_demands.time/60
            power_time_list= power_time_list.tolist()
            
            base_water_demands_new = base_water_demands.iloc[index_list].reset_index(drop=True)
            base_water_demands_new = base_water_demands_new[junc_list]
            
            water_demands_ratio = water_demands/ base_water_demands_new
            water_demands_ratio = water_demands_ratio.clip(upper=1)
            
            water_ecs_list = water_demands_ratio.mean(axis = 1, skipna = True).tolist()
            
            if water_ecs_list[-1] < 0.9:
                abnormal_results.append(scenario)
                
            water_pcs_list = pd.concat([water_demands, base_water_demands_new]).min(level=0).sum(axis=1, skipna = True)/base_water_demands_new.sum(axis=1, skipna = True)
            water_pcs_list = water_pcs_list.tolist()

            base_load_demands = pd.DataFrame(base_power_demands.load.p_mw.tolist() + base_power_demands.motor.pn_mech_mw.tolist()).transpose()
            base_load_demands.columns = base_power_demands.load.name.tolist() + base_power_demands.motor.name.tolist()
            base_load_demands = pd.concat([base_load_demands]*(power_demands.shape[0])).reset_index(drop=True)

            power_demand_ratio = power_demands.iloc[:,1:] / base_load_demands
            power_demand_ratio = power_demand_ratio.clip(upper=1)

            power_ecs_list = power_demand_ratio.mean(axis = 1, skipna = True).tolist()
            power_pcs_list = pd.concat([power_demands.iloc[:,1:], base_load_demands]).min(level=0).sum(axis=1, skipna = True)/base_load_demands.sum(axis=1, skipna = True)
            power_pcs_list = power_pcs_list.tolist()
            
            ml_df_new['water_perf_ecs'] = round(metrics.auc(water_time_list, water_ecs_list), 3)
            ml_df_new['water_perf_pcs'] = round(metrics.auc(water_time_list, water_pcs_list), 3)
            ml_df_new['power_perf_ecs'] = round(metrics.auc(power_time_list, power_ecs_list), 3)
            ml_df_new['power_perf_pcs'] = round(metrics.auc(power_time_list, power_pcs_list), 3)
            
            power_auc_df = pd.DataFrame(data = {'time': power_time_list, 
                                                'ecs': power_ecs_list, 
                                                'pcs': power_pcs_list})
            power_auc_df.to_csv(f"{folder}/{scenario}/{strategy}/power_auc.csv", index = False)
            
            water_auc_df = pd.DataFrame(data = {'time': water_time_list, 
                                                'ecs': water_ecs_list, 
                                                'pcs': water_pcs_list})
            water_auc_df.to_csv(f"{folder}/{scenario}/{strategy}/water_auc.csv", index = False)
            
            ml_df = ml_df.append(ml_df_new, ignore_index=True)
            print(ml_df.iloc[-1,:].tolist())
            
        else:
            pass
    clear_output(wait=True)
    
abnormal_results = list(set(abnormal_results))

0 .  beatrice_sim
['beatrice_sim', 'capacity', 1702.945, 1673.978, 2892.0, 2892.0, 11, 0, 0, 0, 0, 11]


In [6]:
abnormal_results

[]

In [5]:
ml_df

Unnamed: 0,scenario,strategy,water_perf_ecs,water_perf_pcs,power_perf_ecs,power_perf_pcs,water_mains,water_pumps,water_tanks,power_lines,transpo_links,all_compons


In [18]:
ml_df.to_csv("auc_df.csv", index = False)

In [4]:
ml_df = pd.read_csv("auc_df.csv")
ml_df.head()

Unnamed: 0,scenario,strategy,water_perf_ecs,water_perf_pcs,power_perf_ecs,power_perf_pcs,water_mains,water_pumps,water_tanks,power_lines,transpo_links,all_compons
0,point130,capacity,101.0,101.0,100.0,100.0,0,0,0,0,1,1
1,point130,centrality,101.0,101.0,100.0,100.0,0,0,0,0,1,1
2,point130,crewdist,101.0,101.0,100.0,100.0,0,0,0,0,1,1
3,point130,zone,101.0,101.0,100.0,100.0,0,0,0,0,1,1
4,point17,capacity,415.0,415.0,414.0,414.0,0,0,0,1,0,1


In [5]:
ml_df['ecs_auc'] = ml_df['water_perf_ecs']*0.5 + ml_df['power_perf_ecs']*0.5
ml_df['pcs_auc'] = ml_df['water_perf_pcs']*0.5 + ml_df['power_perf_pcs']*0.5
ml_df.head()

Unnamed: 0,scenario,strategy,water_perf_ecs,water_perf_pcs,power_perf_ecs,power_perf_pcs,water_mains,water_pumps,water_tanks,power_lines,transpo_links,all_compons,ecs_auc,pcs_auc
0,point130,capacity,101.0,101.0,100.0,100.0,0,0,0,0,1,1,100.5,100.5
1,point130,centrality,101.0,101.0,100.0,100.0,0,0,0,0,1,1,100.5,100.5
2,point130,crewdist,101.0,101.0,100.0,100.0,0,0,0,0,1,1,100.5,100.5
3,point130,zone,101.0,101.0,100.0,100.0,0,0,0,0,1,1,100.5,100.5
4,point17,capacity,415.0,415.0,414.0,414.0,0,0,0,1,0,1,414.5,414.5


In [7]:
ml_df.columns

Index(['scenario', 'strategy', 'water_perf_ecs', 'water_perf_pcs',
       'power_perf_ecs', 'power_perf_pcs', 'water_mains', 'water_pumps',
       'water_tanks', 'power_lines', 'transpo_links', 'all_compons', 'ecs_auc',
       'pcs_auc'],
      dtype='object')

In [2]:

import numpy as np
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.feature_selection import SelectFromModel
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn import tree
from sklearn.linear_model import LinearRegression

## Decision Tree Model

In [10]:
lab = 'ecs_auc'

features = ml_df[['strategy', 'water_pumps', 'water_mains', 'water_tanks', 'power_lines',
                  'transpo_links', lab]]
features = features.replace('nan', np.NaN)
features = features.dropna()

labels = features[lab]
del features[lab]
features = pd.get_dummies(features)

feature_list = list(features.columns)
features = np.array(features)


KeyError: "['ecs_auc'] not in index"

In [36]:
clf = ExtraTreesRegressor(n_estimators=500)
clf = clf.fit(features, labels)

print(feature_list)
print(clf.feature_importances_)

pd.DataFrame(data = {'features': feature_list,
                     'feature importance': clf.feature_importances_})

['water_pumps', 'water_mains', 'water_tanks', 'power_lines', 'transpo_links', 'strategy_capacity', 'strategy_centrality', 'strategy_crewdist', 'strategy_zone']
[0.    0.329 0.    0.362 0.24  0.015 0.018 0.022 0.013]


Unnamed: 0,features,feature importance
0,water_pumps,0.0
1,water_mains,0.329332
2,water_tanks,0.0
3,power_lines,0.3619
4,transpo_links,0.240288
5,strategy_capacity,0.015152
6,strategy_centrality,0.01781
7,strategy_crewdist,0.022448
8,strategy_zone,0.013071


In [18]:
model = SelectFromModel(clf, prefit=True, threshold = '0.5*median')
features_new = model.transform(features)
features_new.shape

(1168, 7)

In [19]:
feature_list_new = []

for i, feature in enumerate(feature_list):
    if model.get_support()[i]:
        feature_list_new.append(feature)

feature_list_new

['water_mains',
 'power_lines',
 'transpo_links',
 'strategy_capacity',
 'strategy_centrality',
 'strategy_crewdist',
 'strategy_zone']

In [21]:
train_features, test_features, train_labels, test_labels = train_test_split(features_new, labels, test_size = 0.25, random_state = 43)
print('Training Features Shape:', train_features.shape)
print('Training Labels Shape:', train_labels.shape)
print('Testing Features Shape:', test_features.shape)
print('Testing Labels Shape:', test_labels.shape)

Training Features Shape: (876, 7)
Training Labels Shape: (876,)
Testing Features Shape: (292, 7)
Testing Labels Shape: (292,)


In [24]:
parameters = {'max_depth':range(2,10), 'min_samples_leaf': range(3,10)}
clf = GridSearchCV(tree.DecisionTreeRegressor(), parameters, n_jobs=5, scoring = 'accuracy')
clf.fit(X=train_features, y=train_labels)
tree_model = clf.best_estimator_
print (clf.best_score_, clf.best_params_) 

nan {'max_depth': 2, 'min_samples_leaf': 3}


In [32]:
clf = tree.DecisionTreeRegressor(max_depth = 2, random_state = 0, min_samples_leaf=3)
# Train the model on training data
clf.fit(train_features, train_labels)

DecisionTreeRegressor(max_depth=2, min_samples_leaf=3, random_state=0)

In [35]:
y_pred=clf.predict(train_features)   

def measure_performance(y_test, y_pred, X_train):
    from sklearn.metrics import mean_squared_error, r2_score 

    rmse = np.sqrt(mean_squared_error(y_test,y_pred))
    r2 = r2_score(y_test,y_pred)

    # Scikit-learn doesn't have adjusted r-square, hence custom code
    n = y_pred.shape[0]
    k = X_train.shape[1]
    adj_r_sq = 1 - (1 - r2)*(n-1)/(n-1-k)

    print(rmse, r2, adj_r_sq)
    
        
measure_performance(train_labels,y_pred, train_features)

613.8122049313879 0.5847773949808732 0.5814288255855576


## Linear regression

In [39]:
linear_regressor = LinearRegression()  # create object for the class
linear_regressor.fit(train_features, train_labels)  # perform linear regression
y_pred = linear_regressor.predict(train_features)  # make predictions

In [44]:
print(feature_list_new)
print(linear_regressor.coef_)

['water_mains', 'power_lines', 'transpo_links', 'strategy_capacity', 'strategy_centrality', 'strategy_crewdist', 'strategy_zone']
[ 92.546  98.866 151.999  20.597 -68.527  33.737  14.193]


In [43]:
linear_regressor.score(train_features, train_labels, sample_weight=None)

0.7054579748908203