In [1]:
import numpy as np
import pandas as pd
import os
from itertools import combinations, combinations_with_replacement, product
import wntr
import random
import networkx as nx
import copy
import time

In [2]:
# Getting path for the 'parent folder' (OS to call the folder)
path_cwd = os.getcwd()
path_parent = os.path.abspath(os.path.join(path_cwd, os.pardir))
#path_parent
path_cwd

'C:\\Users\\Lenovo\\Desktop\\test jupyter'

In [3]:
# Getting path for the input file
inputfiles_folder_name = '00_Input_files_EPANET'
filename = 'D:/Vineet code/DPTrans-main/Input_files_EPANET/Hanoi_base_demand.inp'
path_file = os.path.join(path_parent,filename)
path_file

'D:/Vineet code/DPTrans-main/Input_files_EPANET/Hanoi_base_demand.inp'

In [4]:
#Reading the input file into EPANET
inp_file = path_file
wn = wntr.network.WaterNetworkModel(inp_file)
wn1 = wntr.network.WaterNetworkModel(inp_file)

In [5]:
# Getting path for the 'No leak datafile' that contains expected states of given nodal demands and resulting flows and pressures
# across the WDN
data_folder_name = 'Data_files'
data_filename = 'data_base_demand_leakgen.csv'
path_data_file = os.path.join(path_parent,data_folder_name,data_filename)
path_data_file

'C:\\Users\\Lenovo\\Desktop\\Data_files\\data_base_demand_leakgen.csv'

In [6]:
expected_states_all = pd.read_csv(path_data_file)
expected_states_all.shape

(10000, 98)

In [7]:
# Here we run an EPANET simulation only to create sample output from a simulation
# and store the column headers of those results. These headers will be used in a file to store results from the 
# "leak experiment" 

sim_leak = wntr.sim.EpanetSimulator(wn1)
results = sim_leak.run_sim() #epanet run
df_demand = results.node['demand']
df_head = results.node['head']
df_flow = results.link['flowrate']

In [8]:
print(results)

<wntr.sim.results.SimulationResults object at 0x000002A51E53DAC0>


In [9]:
# initializing some key values
num_nodes = 32
num_links = 34
num_total_leaks = 1 # For current set of experiments

In [10]:
# create list of names of all nodes and links
link_name = wn1.link_name_list
node_name = wn1.node_name_list

In [11]:
# below code creates a list of nCp combinations for 'n' links with leak nodes and 'p' total leaks in the system. 
# Here we have assumed that EACH PIPE CAN HAVE ONLY ONE LEAK AT A TIME. Since we have only one leak, 
# its a single element list.
def leak_combs(num_tot_leaks):
         
    combs = list(combinations(leak_node_name,num_tot_leaks))
    
    return combs                 

In [12]:
# For a given combination of leak nodes, say 2 leak nodes, each node may have a range of leak areas. The code
# below first creates a list of areas and then creates nPc combinations where 'n' is the no of areas and 'c' 
# is no of leaks
def leak_area_combs(area_list):    
    combs = list(combinations_with_replacement(area_list,num_total_leaks))    #to create the area list dataframe
    return combs

In [13]:
# A hole of radius 1 cm would have an area of around 0.0003 m2 and hole with radius 4 cm would have 
# around 0.005 m2. We are assuming a leak in this area range. Default emmitter coefficient is 0.75 

area_list_train = [0.0005,0.002,0.003,0.004]
area_list_test = [0.0001,0.001,0.005]
split_point_train = [.5]
#split_point_test_1 = [.3,.7]
leak_areas = np.random.uniform(low=0.01, high=0.1, size=3) 
split_point= 0.5

In [14]:
idx=pd.IndexSlice

In [15]:
def leak_cases_generation(area_list,split_point,samples_each_link):
    
    # Use the column names derived from single simulation done in one of the early cells
    #and create empty dataframes to store results of leak experiment 
    
    #demand_data = pd.DataFrame(columns = df_demand.columns)
    leak_demand_data = pd.DataFrame(columns = df_demand.columns)
    head_data = pd.DataFrame(columns = df_demand.columns)
    flow_data = pd.DataFrame(columns = df_flow.columns)

    # 'Expected states' stand for 'No Leak' demand cases that are being used for leak simulation.
    # The underlying demand, resulting flow and pressure without any leak are stored along with the 
    # resulting flow and pressure with leak.
    
    # Selecting only a fraction of the input demand cases, to keep the leak cases dataset of reasonable size
    frac = samples_each_link/expected_states_all.shape[0]
    expected_states = expected_states_all.sample(frac=frac)
    
    expected_states = expected_states.reset_index(drop=True)
    expected_state_data = pd.DataFrame(columns=expected_states.columns)
    expected_states_np = np.array(expected_states)
    train_samples = expected_states.shape[0]
    
    #leak_node_name = 'leak_node'
    leaking_node = pd.DataFrame(columns = ['leak_link','split','leak_area','leak_demand'])
    #leaking_node_data = [] # to store leak link, split_ratio and leak area 
    demand_data_in = pd.DataFrame(columns = df_demand.columns[:-1])
    #area_in = pd.DataFrame(columns=leak_node_name)

    print(train_samples)
    k=0
    for i in range(train_samples):
        #print(i)
        wn = wntr.network.WaterNetworkModel(inp_file)
        
        # set up the reservoir head as per 'No leak datafile'
        wn.get_node(1).head_timeseries.base_value = expected_states_np[i,63]
        # set up demands across junction nodes as per 'No leak datafile'
        for n in range(2,num_nodes+1):
            wn.get_node(n).demand_timeseries_list[0].base_value = expected_states_np[i,n-2]
            
        leak_link = np.random.randint(low=1, high=35, size=1)[0]
       
        leak_areas = np.random.uniform(low=0.005, high=0.1, size=1)[0]
        print(leak_link)          

        wn_leak = copy.deepcopy(wn)
        wn_leak = wntr.morph.link.split_pipe(wn_leak,leak_link,'leak_pipe','leak_node',split_at_point=split_point)
            
        leak_node = wn_leak.get_node('leak_node')
        leak_node.add_leak(wn_leak, area=leak_areas, start_time=0)
        leaking_node_data = []              
        leaking_node_data.append(leak_link)
        leaking_node_data.append(split_point)
        leaking_node_data.append(leak_areas)

        sim = wntr.sim.WNTRSimulator(wn_leak)
        results = sim.run_sim()
        
        leaking_node_data.append(results.node['leak_demand'].loc[0]['leak_node'])
        head_data.loc[k] = results.node['head'].loc[0]
        flow_data.loc[k] = results.link['flowrate'].loc[0]
        expected_state_data.loc[k] = expected_states.loc[i] 
        leaking_node.loc[k] = leaking_node_data
                    
        k=k+1
                
    # Above part of the function generated the required data. The part below saves this data among 
    # datasets with appropriately named columns
    
    #leak_demand = demand_data.copy()
    leak_demand_leak = leak_demand_data.copy()
    leak_flow = flow_data.copy()
    leak_head = head_data.copy()
    
    leak_expected = expected_state_data.copy()
    
    leakheadnames = []
    for x in list(leak_head.columns):
        y = 'leak_head_'+x
        leakheadnames.append(y)

    leak_head.columns = leakheadnames

    leakflownames = []
    for x in list(leak_flow.columns):
        y = 'leak_flow_'+x
        leakflownames.append(y)

    leak_flow.columns=leakflownames


    leak_combined = pd.concat((leaking_node,leak_head,leak_flow,leak_expected),axis=1)
    
    return leak_combined            

In [16]:
leak_train = leak_cases_generation(leak_areas,split_point,10000)

10000
11
23
21
1
33
31
13
30
8
1
4
24
24
22
2
7
13
13
24
15
18
8
24
30
19
14
9
6
4
7
30
20
6
22
34
26
3
16
34
23
34
31
12
27
21
20
3
7
2
16
34
20
15
22
30
27
6
23
5
11
16
19
31
17
22
11
22
30
12
23
6
8
12
27
24
23
7
5
23
2
17
34
4
33
4
24
9
17
18
16
15
26
34
25
8
31
11
24
5
31
15
4
11
20
18
34
1
16
4
33
20
31
4
27
30
14
21
1
2
22
22
32
23
5
33
8
3
20
7
18
7
13
10
34
27
19
29
6
18
27
34
13
2
23
27
26
29
31
21
10
33
30
17
33
25
20
22
34
16
15
18
17
23
32
8
22
25
18
16
19
29
22
8
33
14
23
34
26
33
29
26
32
17
7
5
16
16
17
12
13
10
33
27
30
31
22
30
34
17
26
15
7
5
11
24
12
32
24
27
25
32
12
18
10
34
27
11
33
2
3
15
13
20
4
14
24
29
28
19
26
3
6
13
3
32
19
4
21
19
32
24
21
2
20
17
28
34
32
32
4
33
26
27
22
3
26
29
26
7
14
7
18
23
20
18
28
7
33
13
5
4
3
6
14
18
30
9
5
22
6
28
13
27
19
5
10
21
24
16
29
33
15
16
33
2
29
6
4
2
25
7
20
22
23
31
5
3
8
19
24
11
34
15
25
12
33
23
6
33
32
27
21
29
9
30
4
16
3
1
1
22
11
16
24
19
20
5
14
24
4
15
19
12
2
10
29
28
1
3
16
10
2
16
10
33
29
12
5
29
26
9
2

KeyboardInterrupt: 

In [None]:
leak_test = leak_cases_generation(leak_areas,split_point,10000)

In [53]:
# Output folder name defined
datafiles_folder_name = 'Data_files'

# Output file names defined

#name_train = 'leak_train.csv'
name_test = 'leak_test.csv'

# Creating file paths. Note that 'path_parent' has been defined earlier
#path_train = os.path.join(path_parent,datafiles_folder_name,name_train)
path_test = os.path.join(path_parent,datafiles_folder_name,name_test)

# Creating the 'Data_files' folder, if it doesn't already exists
os.makedirs(os.path.dirname(path_train), exist_ok=True)

# Saving the output datasets as csv files whose paths have been defined above
#leak_train.to_csv(path_train,index=None)
leak_test.to_csv(path_test,index=None)