In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns                                                                                                                                                                                                                                                       

In [15]:
import os
import re

In [16]:
experiment_series ="systems_pop8_local57"
precipitation_setting = "Rain"
decay_setting = "decay"

data_path = f"/media/iru-mls/inside/pop8_results/raw/{experiment_series}/{decay_setting}_{precipitation_setting}"
# data_path = f"/media/iru-mls/inside/{experiment_series}/{precipitation_setting}"

experiment_series = "pop8_local57"
result_path = f"preprocessed_data/{experiment_series}/"

## substances

In [17]:
def tidy_substances(data_path, memilio_id):
    # Path to text file
    file_name = f'INSIDe_substances_results{memilio_id}_output_v4.txt'
    file_path = f"{data_path}/{file_name}"

    # Initialize containers
    records = []
    current_variable = None
    current_manhole = None

    # Regular expressions
    pattern_str = fr'INSIDe_substances_results_{memilio_id}_output_v4_manhole_(MUC\d+)\.txt'
    manhole_pattern = re.compile(pattern_str)
    header_pattern = re.compile(r'time\[min\]\s+(\w+)\([^)]+\) concentration')
    data_pattern = re.compile(r'^(\d+)\s+([-\d.eE]+)$')

    # Read file line by line
    with open(file_path, 'r') as file:
        for line in file:
            line = line.strip()

            # Skip empty lines
            if not line or line == '##':
                continue

            # Match manhole ID
            manhole_match = manhole_pattern.match(line)
            if manhole_match:
                current_manhole = manhole_match.group(1)
                continue

            # Match variable name
            header_match = header_pattern.match(line)
            if header_match:
                current_variable = header_match.group(1)
                continue

            # Match data lines
            data_match = data_pattern.match(line)
            if data_match and current_variable and current_manhole:
                time = int(data_match.group(1))
                value = float(data_match.group(2))
                records.append({
                    "time_in_minutes": time,
                    "variable": current_variable,
                    "value": value,
                    "manhole": current_manhole
                })
    df = pd.DataFrame(records)
    df = df.loc[df.variable!="T"]
    df["time_in_days"] = df["time_in_minutes"]/(24*60)
    df["simulation_id"] = memilio_id
    return df

In [18]:
df = pd.DataFrame()
for memilio_id in range(1, 101):
    print(f"Processing Memilio ID: {memilio_id}")
    df_temp = tidy_substances(data_path, memilio_id)
    df = pd.concat([df, df_temp], ignore_index=True)

os.makedirs(f"{result_path}/substances/", exist_ok=True)
df.to_csv(f"{result_path}/substances/{decay_setting}_{precipitation_setting}_output.csv", index=False)

Processing Memilio ID: 1
Processing Memilio ID: 2
Processing Memilio ID: 3
Processing Memilio ID: 4
Processing Memilio ID: 5
Processing Memilio ID: 6
Processing Memilio ID: 7
Processing Memilio ID: 8
Processing Memilio ID: 9
Processing Memilio ID: 10
Processing Memilio ID: 11
Processing Memilio ID: 12
Processing Memilio ID: 13
Processing Memilio ID: 14
Processing Memilio ID: 15
Processing Memilio ID: 16
Processing Memilio ID: 17
Processing Memilio ID: 18
Processing Memilio ID: 19
Processing Memilio ID: 20
Processing Memilio ID: 21
Processing Memilio ID: 22
Processing Memilio ID: 23
Processing Memilio ID: 24
Processing Memilio ID: 25
Processing Memilio ID: 26
Processing Memilio ID: 27
Processing Memilio ID: 28
Processing Memilio ID: 29
Processing Memilio ID: 30
Processing Memilio ID: 31
Processing Memilio ID: 32
Processing Memilio ID: 33
Processing Memilio ID: 34
Processing Memilio ID: 35
Processing Memilio ID: 36
Processing Memilio ID: 37
Processing Memilio ID: 38
Processing Memilio ID

In [58]:
"""
memilio_id = 1  # Example Memilio ID for individual processing
df = tidy_substances(data_path, memilio_id)

os.makedirs(f"{result_path}/substances/", exist_ok=True)
df.to_csv(f"{result_path}/substances/{decay_setting}_{precipitation_setting}_{memilio_id}_output.csv", index=False)
"""

'\nmemilio_id = 1  # Example Memilio ID for individual processing\ndf = tidy_substances(data_path, memilio_id)\n\nos.makedirs(f"{result_path}/substances/", exist_ok=True)\ndf.to_csv(f"{result_path}/substances/{decay_setting}_{precipitation_setting}_{memilio_id}_output.csv", index=False)\n'

In [19]:
df.manhole.nunique() == 17

True

In [20]:
df.groupby("manhole")["time_in_minutes"].count().describe()["std"]==0.0

np.True_

In [21]:
df.loc[df.variable=="PMMoV", "value"].describe()

count    1.501610e+07
mean     7.568071e+01
std      1.843700e+01
min      0.000000e+00
25%      8.149966e+01
50%      8.321230e+01
75%      8.332656e+01
max      8.333334e+01
Name: value, dtype: float64

In [23]:
df.loc[df.variable=="PMMoV"].groupby("simulation_id")["value"].mean().min()

np.float64(75.67203671958764)

## hydraulic results

In [28]:
def tidy_hydraulics(memilio_id, data_path):
    # Path to text file
    file_name = f'INSIDe_hydraulic_results{memilio_id}_output_v4.txt'
    file_path = f"{data_path}/{file_name}"

    # Containers
    records = []
    current_pipe = None
    current_variable = "flow_rate"  # Only one variable here

    # Regex patterns
    pattern_str = fr'INSIDe_hydraulic_results_{memilio_id}_output_v4_pipe_(MUC\d+)'
    pipe_pattern = re.compile(pattern_str)
    data_pattern = re.compile(r'^([\d.,]+)\s+([\d.,]+)$')

    # Read and parse the file
    with open(file_path, 'r') as file:
        for line in file:
            line = line.strip()
            if not line or line == "##":
                continue

            # Match pipe ID
            pipe_match = pipe_pattern.match(line)
            if pipe_match:
                current_pipe = pipe_match.group(1)
                continue

            # Skip headers
            if line.startswith("t [min]"):
                continue

            # Match data lines
            data_match = data_pattern.match(line)
            if data_match and current_pipe:
                time_str = data_match.group(1).replace(',', '.')
                value_str = data_match.group(2).replace(',', '.')
                try:
                    time = float(time_str)
                    value = float(value_str)
                    records.append({
                        "time_in_minutes": time,
                        "value": value,
                        "pipe_number": current_pipe
                    })
                except ValueError:
                    continue  # skip lines that don't convert properly

    # Convert to DataFrame
    df = pd.DataFrame(records)
    df["time_in_days"] = df["time_in_minutes"]/(24*60)
    df["simulation_id"] = memilio_id
    return df

In [29]:
df = pd.DataFrame()
for memilio_id in range(1, 101):
    print(f"Processing Memilio ID: {memilio_id}")
    df_temp = tidy_hydraulics(memilio_id, data_path)
    df = pd.concat([df, df_temp], ignore_index=True)

os.makedirs(f"{result_path}/hydraulics/", exist_ok=True)
df.to_csv(f"{result_path}/hydraulics/{decay_setting}_{precipitation_setting}_output.csv", index=False)

Processing Memilio ID: 1
Processing Memilio ID: 2
Processing Memilio ID: 3
Processing Memilio ID: 4
Processing Memilio ID: 5
Processing Memilio ID: 6
Processing Memilio ID: 7
Processing Memilio ID: 8
Processing Memilio ID: 9
Processing Memilio ID: 10
Processing Memilio ID: 11
Processing Memilio ID: 12
Processing Memilio ID: 13
Processing Memilio ID: 14
Processing Memilio ID: 15
Processing Memilio ID: 16
Processing Memilio ID: 17
Processing Memilio ID: 18
Processing Memilio ID: 19
Processing Memilio ID: 20
Processing Memilio ID: 21
Processing Memilio ID: 22
Processing Memilio ID: 23
Processing Memilio ID: 24
Processing Memilio ID: 25
Processing Memilio ID: 26
Processing Memilio ID: 27
Processing Memilio ID: 28
Processing Memilio ID: 29
Processing Memilio ID: 30
Processing Memilio ID: 31
Processing Memilio ID: 32
Processing Memilio ID: 33
Processing Memilio ID: 34
Processing Memilio ID: 35
Processing Memilio ID: 36
Processing Memilio ID: 37
Processing Memilio ID: 38
Processing Memilio ID

In [None]:
"""
df = tidy_hydraulics(memilio_id, data_path)

os.makedirs(f"{result_path}/hydraulics/", exist_ok=True)
df.to_csv(f"{result_path}/hydraulics/{decay_setting}_{precipitation_setting}_{memilio_id}_output.csv", index=False)
"""

'\ndf = tidy_hydraulics(memilio_id, data_path)\n\nos.makedirs(f"{result_path}/hydraulics/", exist_ok=True)\ndf.to_csv(f"{result_path}/hydraulics/{decay_setting}_{precipitation_setting}_{memilio_id}_output.csv", index=False)\n'

In [30]:
df.pipe_number.nunique()

18

In [31]:
df.value.describe()

count    1.911298e+08
mean     9.857949e-01
std      2.820718e+00
min      0.000000e+00
25%      3.036367e-02
50%      1.909300e-01
75%      1.305500e+00
max      7.449310e+01
Name: value, dtype: float64