In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd

import pickle

import wntr

In [3]:
from src.config import config

In [4]:
from src.preprocess.preprocess_heads import (HeadSimsLeakyPreprocessor,
                                             HeadSimsLeakFreePreprocessor,
                                             PressureMeasurementsPreprocessor)
from src.preprocess.helper_funcs import preprocess_exp_leaks_info
from src.tdpbc.helper_funcs import calc_tdpbc_for_daytime_range
from src.datamatrix.helper_funcs import (construct_train_and_test_matrix_for_leak_exp,
                                         construct_ungrouped_leak_labels)

from src.datamatrix.config import PRESSURE_LOGGERS_KEEP_IN_DATAMAT_MC_3

# Load network info and data

In [5]:
data_dir = config.DATA_DIR

#### Load network info

In [6]:
simulations_input_dir = data_dir / "input/BKTown/HPC_run_05/"

In [7]:
inp_file_name = simulations_input_dir / "BKTown_original_WDN_8Dec2020_17Dec2020_calibrated.inp"
wn = wntr.network.WaterNetworkModel(inp_file_name=inp_file_name)

In [8]:
pressure_loggers_asset_id_file = simulations_input_dir / "pressure_loggers_asset_id.csv"
pressure_loggers_asset_id = pd.read_csv(pressure_loggers_asset_id_file, dtype=str)

hydrants_asset_id_file = simulations_input_dir / "hydrants_asset_id.csv"
hydrants_asset_id = pd.read_csv(hydrants_asset_id_file, dtype=str)

#### Load leak scenarios

In [9]:
simulated_data_dir = data_dir / "simulated/BKTown/HPC_run_05/"

In [10]:
filename = simulated_data_dir / "BKTown_scenarios.pkl"
scenarios = pickle.load( open( filename, "rb" ) )

#### Load head results for leak scenarios

In [11]:
filename = simulated_data_dir / "head_results.pkl"
head_results = pickle.load( open( filename, "rb" ) )

Load leak free heads

In [12]:
simulated_data_dir = data_dir / "simulated/BKTown/local_run_04/"

In [13]:
filename = simulated_data_dir / "head_results_leakfree.pkl"
head_results_leakfree = pickle.load( open( filename, "rb" ) )

#### Load measured pressure data

In [14]:
measured_data_dir = data_dir / "raw/BKTown/pressure_meas/"

pressure_data_file = measured_data_dir / "mobile_pressure_data-08122020_15022021-UTCp01h00.csv"
pressure_data = pd.read_csv(pressure_data_file, dtype=str)

#### Load info on experiments

In [15]:
measured_data_dir = data_dir / "raw/BKTown/experiments/"

infield_logging_file = measured_data_dir / "infield_logging_leaks_MC_3.csv"
exp_leaks_info = pd.read_csv(infield_logging_file , dtype=str)

# Configuration

#### Define pressure loggers

In [16]:
all_pressure_loggers = list(pressure_loggers_asset_id['asset_id'])

#### Define leak candidates

In [17]:
# hydrant asset id's are the leak candidates
all_leak_locs = list(hydrants_asset_id['ID'])

#### Preprocess info on experimental leaks

In [18]:
exp_leaks_info = preprocess_exp_leaks_info(exp_leaks_info, round_leak_datetimes_5min=False)

#### Configure preprocessing

In [19]:
leaky_sims_start, leaky_sims_end = "2020-12-16 00:00:00", "2020-12-18 00:00:00"
leakfree_sims_start, leakfree_sims_end = "2020-12-08 00:00:00", "2020-12-18 00:00:00"

leaky_sims_write_to_disk_start, leaky_sims_write_to_disk_end = "2020-12-16 00:00:00", "2020-12-18 00:00:00"

#### Configure TDPBC

In [20]:
tdpbc_start_date = "2020-12-08 00:00:00"
tdpbc_end_date = "2020-12-16 00:00:00"

In [21]:
weekdays_to_include_tdpbc = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']

In [22]:
lin_weighting = False

#### Configure construction of datamatrices

In [23]:
p_logs_as_features = PRESSURE_LOGGERS_KEEP_IN_DATAMAT_MC_3
nr_of_train_samples_per_leak = 40

#### Drop a leak experiment from the analysis

In [24]:
#leak_experiment_to_drop = ""
#exp_leaks_info = exp_leaks_info[exp_leaks_info['3GE FID'] != leak_experiment_to_drop]

# Preprocess head simulations and measurements

In [25]:
head_sims_leaky_preproc = HeadSimsLeakyPreprocessor(sims_start=leaky_sims_start,
                                                         sims_end=leaky_sims_end,
                                                         sims_extract_start=leaky_sims_write_to_disk_start,
                                                         sims_extract_end=leaky_sims_write_to_disk_end,
                                                         delete_last_row=False)
head_res = head_sims_leaky_preproc.transform(head_results)

In [26]:
head_sims_leakfree_preproc = HeadSimsLeakFreePreprocessor(sims_start=leakfree_sims_start,
                                                          sims_end=leakfree_sims_end)
head_res_leakfree = head_sims_leakfree_preproc.transform(head_results_leakfree)

In [27]:
pressure_meas_preproc = PressureMeasurementsPreprocessor(sims_start=leakfree_sims_start,
                                                         sims_end=leakfree_sims_end,
                                                         pressure_loggers_to_convert=all_pressure_loggers,
                                                         wn=wn,
                                                         shift_one_hour=False,
                                                         drop_nan_p_logger=False)
head_meas = pressure_meas_preproc.transform(pressure_data)

# Calculate TDPBC

In [28]:
tdpbc_per_exp_leak_location = {}

for leak_location in exp_leaks_info['3GE FID']:
    
    # Get start and end time for leak location
    leak_start_datetime = exp_leaks_info[exp_leaks_info['3GE FID'] == leak_location]['start_leak_datetime']
    leak_end_datetime = exp_leaks_info[exp_leaks_info['3GE FID'] == leak_location]['end_leak_datetime']
    leak_start_datetime = leak_start_datetime.values[0]
    leak_end_datetime = leak_end_datetime.values[0]
    leak_start_time = str(pd.to_datetime(leak_start_datetime).time())
    leak_end_time = str(pd.to_datetime(leak_end_datetime).time())
    
    tdpbc_by_pressure_logger = {}
    
    for pressure_logger in all_pressure_loggers:
        
        tdpbc_by_pressure_logger[pressure_logger] = {}
        tdpbc_mean, tdpbc_std = calc_tdpbc_for_daytime_range(head_meas,
                                                             head_res_leakfree,
                                                             pressure_logger,
                                                             weekdays_to_include_tdpbc,
                                                             start_date=tdpbc_start_date,
                                                             end_date=tdpbc_end_date, # end date not included
                                                             start_daytime=leak_start_time,
                                                             end_daytime=leak_end_time,
                                                             lin_weighting=lin_weighting) # end daytime included
        
        tdpbc_by_pressure_logger[pressure_logger]['mean'] = tdpbc_mean
        tdpbc_by_pressure_logger[pressure_logger]['std'] = tdpbc_std
    
    tdpbc_per_exp_leak_location[leak_location] = tdpbc_by_pressure_logger
    
# e.g. tdpbc_per_exp_leak_location['775125'] contains the TDPBC for the experimental leak at '775125'


Mean of empty slice


Degrees of freedom <= 0 for slice.



In [85]:
#for key in list(tdpbc_per_exp_leak_location.keys())[0:10]:
#    print(key)
#    print('\n')
#    for p in p_logs_as_features:
#        print(p)
#        print(tdpbc_per_exp_leak_location[key][p])
#    print('\n')

# Construct datamatrices

In [29]:
X_train_per_leak_exp = {}
X_test_per_leak_exp = {}

for leak_location in exp_leaks_info['3GE FID']:
    
    # Get start and end time for leak location
    leak_start_datetime = exp_leaks_info[exp_leaks_info['3GE FID'] == leak_location]['start_leak_datetime']
    leak_end_datetime = exp_leaks_info[exp_leaks_info['3GE FID'] == leak_location]['end_leak_datetime']
    leak_start_datetime = str(pd.to_datetime(leak_start_datetime.values[0]))
    leak_end_datetime = str(pd.to_datetime(leak_end_datetime.values[0]))
    
    X, X_real_test = construct_train_and_test_matrix_for_leak_exp(leak_location=leak_location,
                                                                  leak_start_datetime=leak_start_datetime,
                                                                  leak_end_datetime=leak_end_datetime,
                                                                  head_meas=head_meas,
                                                                  head_res=head_res,
                                                                  head_res_leakfree=head_res_leakfree,
                                                                  tdpbc_per_exp_leak_location=tdpbc_per_exp_leak_location,
                                                                  scenarios=scenarios,
                                                                  p_logs_as_features=p_logs_as_features,
                                                                  all_leak_locs=all_leak_locs,
                                                                  nr_of_train_samples_per_leak=nr_of_train_samples_per_leak)
    X_train_per_leak_exp[leak_location] = X
    X_test_per_leak_exp[leak_location] = X_real_test

In [30]:
y = construct_ungrouped_leak_labels(np.arange(0,len(all_leak_locs)))

y_train_per_leak_exp = {}
y_test_per_leak_exp = {}

for leak_location in exp_leaks_info['3GE FID']:
    y_train_per_leak_exp[leak_location] = y
    y_test_per_leak_exp[leak_location] = leak_location

# Write to disk

In [32]:
processed_data_dir = data_dir / "processed/HPC_run_05/experiment_00"

X_train_per_leak_exp_file_name = processed_data_dir / "X_train_per_leak_exp.p"
pickle.dump(X_train_per_leak_exp, open(X_train_per_leak_exp_file_name, "wb" ))

X_test_per_leak_exp_file_name = processed_data_dir / "X_test_per_leak_exp.p"
pickle.dump(X_test_per_leak_exp, open(X_test_per_leak_exp_file_name, "wb" ))

y_train_per_leak_exp_file_name = processed_data_dir / "y_train_per_leak_exp.p"
pickle.dump(y_train_per_leak_exp, open(y_train_per_leak_exp_file_name, "wb" ))

y_test_per_leak_exp_file_name = processed_data_dir / "y_test_per_leak_exp.p"
pickle.dump(y_test_per_leak_exp, open(y_test_per_leak_exp_file_name, "wb" ))