# Processing of outputs from water resources models with reservoir level, turbine flow and spill recorders

### Tomasz Janus
### 11/04/2024

### TODO
1. fetch excel outputs from pywr
2. fetch a list of hp sites with ror/sto status from data generated in the previous notebook
3. parse excel output columns to obtain reservoir names
4. fetch a reservoir/turbine parameters from models such that we can calculate composite features such as ratio of head to max head, etc.
4. summarise and group and save in a json/yaml/toml file

In [567]:
from typing import Dict
import pathlib
import os
import re
import json
import pandas as pd
import geopandas as gpd
pd.set_option('display.max_columns', None)

In [178]:
# file_paths
file_folder = pathlib.Path("outputs/pywr_sim_aligned_models_with_levels/")
file_irrawaddy = file_folder  / "outputs_irrawaddy_hist_with_flow_rec.xlsx"
file_salween = file_folder / "outputs_salween_hist_with_flow_rec.xlsx"
file_sittaung = file_folder / "outputs_sittaung_hist_with_flow_rec.xlsx"

# Get series of mean values of all recorded columns - required fo find mean HP, spill and 
# level for each reservoir
df_irrawaddy = pd.read_excel(file_irrawaddy, skiprows=[2]).drop(index=0).mean(numeric_only=True)
df_salween = pd.read_excel(file_salween, skiprows=[2]).drop(index=0).mean(numeric_only=True)
df_sittaung = pd.read_excel(file_sittaung, skiprows=[2]).drop(index=0).mean(numeric_only=True)

In [231]:
turbine_pattern = r'[tT]urbine'

def megam3d_to_m3s(flow: float) -> float:
    """Convert between flows in Mm3/d (from pywr water resources model) to m3/s"""
    return flow * 1e6 / 24 / 3_600 

def reservoir_name_from_turbine_node(node_name: str) -> str | None:
    """ """
    pattern = r'([\w\s()]+?)_(?:turbine|Turbine)(?:_|$)|(?:turbine|Turbine)_(.+)$'
    match = re.search(pattern, node_name)
    if match:
        return match.group(1).strip() if match.group(1) else match.group(2).strip()
    return None

def add_with_summation(data: Dict[str, float], key: str, value: float) -> None:
    """ """
    if key in data.keys():
        data[key] += value
    else:
        data[key] = value
    
def get_data_from_pywr_outputs(results: pd.DataFrame) -> Dict:
    """ """
    outputs_dict = dict()
    for name, value in results.items():

        if ":energy" in name:
            processed_energy_recorder = name.replace(":energy", "")
            processed_energy_recorder = re.sub(turbine_pattern, '', processed_energy_recorder)\
                .replace("_", "").strip()
            processed_energy_recorder += "_hp"
            add_with_summation(outputs_dict, processed_energy_recorder, value)

        if "LevelRec" in name:
            processed_level_recorder = name.replace("LevelRec", "").replace("_", "").strip()
            processed_level_recorder += "_level"
            add_with_summation(outputs_dict, processed_level_recorder, value)

        if "FlowRec" in name:
            processed_flow_recorder = name.replace("FlowRec", "")
            processed_flow_recorder = re.sub(turbine_pattern, '', processed_flow_recorder)\
                .replace("_", "").strip()    
            if processed_flow_recorder.lower().find("spill") != -1  or \
                    processed_flow_recorder.lower().find("controlspill") != -1:
                suffix = "_spillflow"
                spill_pattern = r'([sS]pill|[cC]ontrol[cS]pill)'
                processed_flow_recorder = re.sub(spill_pattern, '', processed_flow_recorder)\
                    .replace("_", "").strip()
            else:
                suffix = "_flow"
            processed_flow_recorder += suffix
            add_with_summation(outputs_dict, processed_flow_recorder, value)
    return outputs_dict

def convert_output_dict_to_df(output_dict: Dict) -> pd.DataFrame:
    """ """
    rec_df = pd.DataFrame.from_dict(output_dict, orient="index")
    rec_df_transpose = rec_df.transpose()
    rec_df_transpose.columns = rec_df_transpose.columns.str.split('_', expand = True)
    rec_df_transpose.columns.names = [None, 'Reservoir']
    output_df = rec_df_transpose.stack(-1).reset_index().set_index("Reservoir")
    cols_to_drop = [col for col in output_df.columns if 'irrigation' in col.lower()]
    cols_to_drop.append('level_0')
    output_df = output_df.drop(columns=cols_to_drop)
    output_df = output_df.transpose()
    return output_df

def extract_data_from_pywr_model(model_path: pathlib.Path) -> pd.DataFrame:
    """ """
    with open(model_path, 'r') as json_file:
        model = json.load(json_file)
    # Initialize reservoir dictionary:
    res_dict = dict()
    # Get the reservoir node parameters
    for node in model['nodes']:
        if node['type'] != "storage":
            continue
        res_params = {
            'status': node['comment'] if 'comment' in node else "unknown",
            'min_vol': node['min_volume'] if 'min_volume' in node else 0.0,
            'max_vol': node['max_volume']
        }
        res_dict[node['name']] = res_params    
    # Get the reservoir/turbine data from model parameters
    for reservoir in res_dict.keys():
        for par_name, par_data in model['parameters'].items():
            if par_name == "__"+reservoir+"__:max_power_flow":
                res_dict[reservoir].update({"turbine_elevation": par_data['turbine_elevation']})
            if par_name == "__"+reservoir+"__:power_capacity":
                res_dict[reservoir].update({"capacity": par_data['value']})
            # We also need to find max_level
            if par_name == "__"+reservoir+"__:level":
                max_level = max(par_data["values"])
                res_dict[reservoir].update({"max_level": max_level})    
    # Convert res_dict to a dataframe and return it
    return pd.DataFrame(res_dict)

### 1. Get the flow, hp production and level data from water resources models

In [373]:
sittaung_dict = get_data_from_pywr_outputs(df_sittaung)
salween_dict = get_data_from_pywr_outputs(df_salween)
irrawaddy_dict = get_data_from_pywr_outputs(df_irrawaddy)
# 1. Process sittaung
output_df_sittaung = convert_output_dict_to_df(sittaung_dict)
# There's a doubling of names: `Paung Laung (Upper)` and `Paung Laung (upper)`
doubled_reservoir_sittaung = "Paung Laung (upper)"
output_df_sittaung.loc['Paung Laung (Upper)', 'spillflow'] += \
    output_df_sittaung.loc[doubled_reservoir_sittaung, 'spillflow']
output_df_sittaung.drop(doubled_reservoir_sittaung, inplace=True)
# 2. Process salween
output_df_salween = convert_output_dict_to_df(salween_dict)
# 3. Process irrrawaddy
output_df_irrawaddy = convert_output_dict_to_df(irrawaddy_dict)\
    .drop(["Mandalay", "YangonDomesticWater"])

out_res_map = {
    "MongTon" : "Mong Ton"
}

out_combined = pd.concat([
    output_df_irrawaddy,
    output_df_salween,
    output_df_sittaung], axis=0).rename(index=out_res_map)

In [323]:
out_combined.head()

Reservoir,flow,hp,level,spillflow
Buywa,4.835465,23.934569,303.503479,12.310699
Chipwi,130.647939,2016.700247,391.16899,96.85536
Chipwi Nge,1.088361,48.58278,736.912768,1.225186
Dapein 1,16.11395,107.03212,250.0,2.771483
Dapein 2,15.204059,69.804121,175.539568,5.949399


### 2. Get the reservoir/turbine parameters from the pywr water resources file(s)
**IMPORTANT NOTE:** The water resources models are not shared in this repository. Therefore the code below is private and only works on the Author's computer. We keep this code for reference but normally, this code will be skipper and the data will be read from the pre-saved .csv files

In [533]:
extract_from_pywr_models: bool = False # ONLY SET TO TRUE IF YOU HAVE ACCESS TO THE PYWR MODELS, 
                                       # OTHERWISE SET TO FALSE
file_path = pathlib.Path("inputs/pywr_res_turbine_parameters")
file_name: str = "res_turbine_parameters_pywr.xlsx"

# We need to map some reservoir names because of inconsistent reservoir naming in the pywr models
# of Myanmar. The names in nodes do not correspond to the names 'embedded' in the parameter/recorder 
# names. Therefore we need to rename some of the reservoir such that there are no doubled rows after
# merging with the dataframe with pywr model outputs
par_res_map = {
    "Lemro_1": "Lemro1",
    "Lemro_2": "Lemro2",
    "ManTong": "Mantong",
    "Nam_Paw": "Nam Paw",
    "Mi_Chaung": "MiChaung",
    "Saing_Din": "SaingDin"
}

if extract_from_pywr_models:
    os.makedirs(file_path, exist_ok = True)
    ### Get the information from the model - max_hp_capacity, max_level, turbine_elevation
    # Produce the table and erase the code!!
    irr_model_path = pathlib.Path(
        "/home/lepton/Documents/git_projects/myanmar_hydro/models/" +
        "sim_hp_recorders_new_pywr_aligned_flow_level_recorders/irrawaddy/" +
        "Irrawaddy_pywr_historical_southampton_new_Kc.json")
    sal_model_path = pathlib.Path(
        "/home/lepton/Documents/git_projects/myanmar_hydro/models/" +
        "sim_hp_recorders_new_pywr_aligned_flow_level_recorders/salween/" +
        "Salween_pywr_historical_southampton_new_Kc.json")
    sit_model_path = pathlib.Path(
        "/home/lepton/Documents/git_projects/myanmar_hydro/models/" +
        "sim_hp_recorders_new_pywr_aligned_flow_level_recorders/sittaung/" +
        "Sittaung_pywr_historical_southampton_new_Kc.json")
    # Remove reservoirs in the models that are not HP
    par_data_irr = extract_data_from_pywr_model(irr_model_path).drop(
        columns=['Yangon_Domestic_Water','Mandalay_Domestic_Water'])
    par_data_sal = extract_data_from_pywr_model(sal_model_path).drop(
        columns=['Moe Byal_Irrigation_Reservoir'])
    par_data_sit = extract_data_from_pywr_model(sit_model_path).drop(
        columns=['Sinthe_Irrigation_Reservoir','Ngalaik_Irrigation_Reservoir', 
                 'Yezin_Irrigation_Reservoir','Chaungmange_Irrigation_Reservoir',
                 'Ngamoeyeik_Irrigation_Reservoir'])
    par_combined = pd.concat([par_data_irr, par_data_sal, par_data_sit], axis=1)
    par_combined.to_excel(file_path/file_name)
else:
    # Read from pre-saved excel file
    par_combined = pd.read_excel(file_path/file_name)
par_combined = par_combined\
    .rename(columns={"Unnamed: 0" : "Reservoir"})\
    .set_index("Reservoir")\
    .transpose()\
    .rename(index=par_res_map)
par_combined.loc["Paung Laung (Lower)"]['turbine_elevation'] = 104
par_combined.loc["Paung Laung (Middle)"]['turbine_elevation'] = 174
par_combined.loc["Paung Laung (Upper)"]['turbine_elevation'] = 273
par_combined.loc["Mong Ton"]['turbine_elevation'] = 220
par_combined.loc["Nam Paw"]['turbine_elevation'] = 756.0

In [534]:
# Combine outputs and parameters into a single dataframe
out_par = pd.merge(out_combined, par_combined, how = "outer", left_index=True, right_index=True)

In [402]:
out_par.head()

Reservoir,flow,hp,level,spillflow,status,min_vol,max_vol,max_level,turbine_elevation
Baluchaung (Upper),1.058157,15.114256,1140.0,0.882495,Under-Construction,2.2,2.2,1140,992
Baluchaung1,3.315697,22.4,867.0,101.294577,Built,2.0,2.0,867,797
Baluchaung2,3.299983,134.4,787.0,102.168026,Built,2.0,2.0,787,365
Baluchaung3,3.781058,41.6,339.0,101.686951,Built,2.0,2.0,339,225
Bawgata,1.765632,36.177379,227.288369,0.020921,LocMoU,557.0,835.0,313,60


### 3. Get emission estimates afrom intermediate pre-calculated data in the earlier steps

In [541]:
# Load the earlier pre-calculated data from excel file
emissions_df = pd.read_excel(
    pathlib.Path('intermediate/hp_multi_ror_emissions_and_generation.xlsx'))\
    .drop(columns=['geometry']).rename(columns={"name": "Reservoir"})\
    .set_index("Reservoir")
emissions_df.head()

Unnamed: 0_level_0,reservoir_type,r_area_km2,Status,status_2_ifc,ro_r_or_sto_ifc,hp_type_reem,res_area,mean,pctile_2,pctile_3,pctile_5,pctile_10,ann_gen,em_intensity,hp_fraction,co2_net,ch4_net,tot_em_net,tot_em,em_intensity_range
Reservoir,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Bawgata,hydroelectric,10.046,Future Hydroelectric,P,S,sto,10.046,36.0,2.1,2.2,2.4,3.0,315.576,29.247852,1.0,672.8369,244.5322,918.7657,9.22992,20 - 50
Belin,hydroelectric,329.727,Future Hydroelectric,P,S,sto,329.727,131.0,64.0,67.0,70.0,77.0,1148.346,185.6013,1.0,247.9838,398.0138,646.3969,213.134511,150 - 300
Buywa,multipurpose,34.904,Future Hydroelectric,C,S,sto,34.904,24.0,1.6,1.6,1.7,1.9,210.384,194.13676,1.0,334.1893,835.9579,1170.1601,40.843268,150 - 300
Buywa (upper),multipurpose,66.257,Future Hydroelectric,P,S,sto,66.257,79.0,3.8,3.9,4.1,4.7,692.514,69.749587,1.0,395.9809,333.0209,729.0183,48.302566,50 - 150
Dum Ban,hydroelectric,3.053,Future Hydroelectric,P,S,sto,3.053,32.0,0.0,0.0,0.0,0.0,280.512,6.058718,1.0,192.3563,364.2737,556.6797,1.699543,5 - 20


### 4. Combine the three datasets and save to excel/yaml/json

In [542]:
out_par_merge = out_par.rename(
    index = {
        "Baluchaung1": "Baluchaung 1",
        "Baluchaung2": "Baluchaung 2",
        "Baluchaung3": "Baluchaung 3",
        "Baluchaung (Upper)": "Baluchaung (upper)",
        "Shweli1" : "Shweli 1",
        "Shweli2" : "Shweli 2",
        "Shweli3" : "Shweli 3",
        "Upper Yeywa": "Yeywa (upper)",
        "Middle Yeywa": "Yeywa (middle)",
        'Upper Buywa' : "Buywa (upper)",
        'Upper Sedawgyi' : "Sedawgyi (upper)",
        'SaingDin': "Saing Din",
        "Paung Laung (Lower)": "Lower Paunglaung",
        "Paung Laung (Middle)": "Paung Laung (middle)",
        "Paung Laung (Upper)": "Upper Paunglaung",
        "Hawkham (Upper)": "Hawkham (upper)",
        'Zawgyi1' : "Zawgyi I",
        'Zawgyi2' : "Zawgyi II",
        "Nam Pawn (Lower)" : "Nam Pawn (lower)",
        "Nam Pawn (Upper)": "Nam Pawn (upper)",
        "Lemro1": "Lemro 1",
        "Lemro2": "Lemro 2",
        "Thapanzeik": "Thaphanseik",
        'Keng Tawng (Upper)' : "Keng Tawng (upper)",
        "MiChaung": "Mi Chaung"
    })
out_par_em = pd.merge(
    out_par_merge, emissions_df, how = "inner", 
    left_index=True, right_index=True)
out_par_em.head()

Unnamed: 0,flow,hp,level,spillflow,status,min_vol,max_vol,max_level,turbine_elevation,reservoir_type,...,pctile_5,pctile_10,ann_gen,em_intensity,hp_fraction,co2_net,ch4_net,tot_em_net,tot_em,em_intensity_range
Baluchaung (upper),1.058157,15.114256,1140.0,0.882495,Under-Construction,2.2,2.2,1140,992,hydroelectric,...,0.0,0.0,131.49,3.0,1.0,,,,0.39447,< 5
Baluchaung 1,3.315697,22.4,867.0,101.294577,Built,2.0,2.0,867,797,hydroelectric,...,22.0,22.0,192.852,3.0,1.0,,,,0.578556,< 5
Baluchaung 2,3.299983,134.4,787.0,102.168026,Built,2.0,2.0,787,365,hydroelectric,...,134.0,134.0,1174.644,3.0,1.0,,,,3.523932,< 5
Baluchaung 3,3.781058,41.6,339.0,101.686951,Built,2.0,2.0,339,225,hydroelectric,...,42.0,42.0,368.172,3.0,1.0,,,,1.104516,< 5
Bawgata,1.765632,36.177379,227.288369,0.020921,LocMoU,557.0,835.0,313,60,hydroelectric,...,2.4,3.0,315.576,29.247852,1.0,672.8369,244.5322,918.7657,9.22992,20 - 50


In [550]:
print(f"Processed HP sites: {len(out_par_em)} out of {len(out_par)} in pywr")

Processed HP sites: 99 out of 104 in pywr


In [544]:
# Check that there are no nan values on the merged columns suggesting that we either haven't 
# got all the values from the model files or that we didn't sort out all the reservoir naming conventions
# prior to data merging and there are some repeated rows, e.g. Baluchaung 1 and Baluchaung1, etc.
nan_mask = pd.isnull(out_par_em[['flow', 'hp', 'level', 'spillflow', 'turbine_elevation']]).any(axis=1)
try:
    assert len(out_par_em[nan_mask]) == 0
    print("Everything is fine, we're good to go")
except AssertionError:
    print("There are Nan values in some of the rows, indicating missing data. See below.")
    print(out_par_em[nan_mask])

Everything is fine, we're good to go


In [522]:
print(
    "Reservoir for which emissions are related to only a fraction of hp generation\n"+
    "due to their multipurpose nature")
out_par_em[out_par_em['hp_fraction']<1]

Reservoir for which emissions are related to only a fraction of hp generation
due to their multipurpose nature


Unnamed: 0,flow,hp,level,spillflow,status,min_vol,max_vol,max_level,turbine_elevation,reservoir_type,...,pctile_5,pctile_10,ann_gen,em_intensity,hp_fraction,co2_net,ch4_net,tot_em_net,tot_em,em_intensity_range
Kabaung,2.424973,12.12509,114.933787,0.721228,Built,1083.78,1468.0,119,66,multipurpose,...,0.87,0.87,105.192,424.622826,0.69,280.412136,728.58549,1010.701098,44.666924,300 - 450
Kinda,2.416788,18.110432,186.044712,0.523948,Built,207.0,1078.0,193,117,multipurpose,...,0.0,0.0,157.788,151.803768,0.67,322.611432,418.343578,741.320696,23.952813,150 - 300
Kun Chaung,2.789461,24.168129,189.099256,0.423864,unknown,333.0,1468.0,190,100,multipurpose,...,0.48,0.68,210.384,361.178044,0.82,271.636234,883.953358,1157.442218,75.986082,300 - 450
Lower Paunglaung,9.270211,103.657685,216.619801,0.149752,Built,340.0,690.0,225,104,multipurpose,...,23.0,25.0,911.664,21.303514,0.94,434.59631,682.729708,1117.599652,19.421647,20 - 50
Myittha,1.87726,13.196804,339.974503,0.962976,Built,153.0,325.0,352,279,multipurpose,...,0.33,0.52,113.958,185.737739,0.94,196.209396,1004.28848,1200.516206,21.166301,150 - 300
Myogyi,2.836352,21.5859,192.258611,119.326833,Built,170.0,444.0,193,118,multipurpose,...,15.0,19.0,192.852,96.944237,0.83,209.427841,1928.9117,2138.628464,18.69589,50 - 150
Phyu Chaung,3.258217,16.458614,122.637757,0.84721,Built,52.35,779.57,162,80,multipurpose,...,0.72,1.1,140.256,386.814232,0.74,326.96937,956.960822,1285.921236,54.253017,300 - 450
Sedawgyi,6.026666,17.439201,125.942229,31.679027,Built,104.0,448.0,129,100,multipurpose,...,0.9,1.5,149.022,509.077632,0.76,295.623888,1573.1487,1868.979992,75.863767,450 - 1500
Thaphanseik,3.46722,13.111874,156.852034,0.0,Built,481.0,3553.0,159,120,multipurpose,...,7.8,8.3,113.958,2206.872879,0.53,167.020437,1023.258174,1191.323718,251.49082,> 1500
Yenwe,3.354946,17.417822,99.281085,0.192037,Built,149.0,1089.0,103,46,multipurpose,...,0.0,0.0,149.022,620.583948,0.83,250.679671,960.392502,1212.988394,92.480661,450 - 1500


### 5. Join with IFC data such that we can compare HP generation against the installed capacity

In [575]:
ifc_db = gpd.read_file(pathlib.Path("bin/gis_layers/ifc_database/all_dams_replaced_refactored.shp"))\
    .loc[:,["ID", "IFC_ID", 'DAM_NAME', 'DAM_HEIGHT', 'FSL (m)', 'LWL (m)', 'HRT',
           'Des_Head', 'Des_Disch', 'STOR_MCM',
           'Inst_cap', 'Annual Gen', 'RIV_ORD','geometry']]\
    .rename(columns={"DAM_NAME": "Reservoir"})\
    .set_index("Reservoir")
out_par_em_ifc = pd.merge(out_par_em, ifc_db, how = "left", left_index=True, right_index=True)\
    .astype({'ID': 'int', 'IFC_ID' : 'int', 'RIV_ORD': 'int'})
# Add new columns (variables) quantifying the factors driving emission factors in hydroelectric reservoirs

**Methodology explanation**
Hydropower generation $\begin{equation} G_{HP} \propto Q \times (H - z_t) \end{equation}$, where $Q$ is the flow via the turbine, $H$ is the water head and $z_t$ is the turbine elevation.
Emission intensity of a hydroelectric plant $\begin{equation}E_{GHG} = \displaystyle\frac{(e_{CO_2} + e_{CH_4})*A}{G_{HP}} \end{equation}$.
In the absence of any limitations imposed by the limited capacity of the transmission network, the hydroelectric plant will produce the maximum amount of electricity if it's operating near the maximum design head at a maximum flow such that the operation is near the turbines' maximum capacity at all times. Operational inefficiencies of a hydroelectric plant will result from sub-optimal operation, e.g. operating at low head, loosing significant volumes of water via spills and overflow as well as from the environmental conditions, i.e. reduced water inflows from the catchment and the river network, e.g. due to changes in the atmospheric conditions or land use, and increased flow variability, e.g. perdiods of excessively large flows necessitating water releases via overflows followed by periods of draughts. We quantify the potential sub-optimality of HP operation using the following indices.\
**1. Plant factor** :
$\begin{equation}PF = \cfrac{\bar{G_{HP}}}{G_{HP,max}} \end{equation}$ \
**2. Level Headroom** : 
$\begin{equation}1 - \displaystyle\frac{\bar{H}-z_t}{z_{max} - z_t}\end{equation}$ \
**3. Flow Headroom** :
$\begin{equation}1 - \displaystyle\frac{\bar{Q_t}}{Q_{t,max}}\end{equation}$ \
**4. Utility Flow Fraction** :
$\begin{equation} \displaystyle\frac{Q_t}{Q_t + Q_{spill}} \end{equation}$

**Estimate power production figure as**:

$\begin{equation}
    G_{HP} = k \times \displaystyle\left( Q_{t,max}\times\frac{\bar{Q_t}}{Q_{t,max}} \right)\,
    \left( \left(H_{max} - z_t\right)\times\frac{\bar{H}-z_t}{H_{max} - z_t} \right)
\end{equation}$

$\begin{equation} \dot{M}_{GHG} = \left(e_{CO_2} + e_{CH_4}\right) \times A\end{equation}$

$\begin{equation}
    E_{GHG} = \displaystyle\frac{\dot{M}_{GHG}}{G_{HP}}
\end{equation}$

**Factors for ML model:** $Q_{t,max}$ ,$\displaystyle\frac{\bar{Q_t}}{Q_{t,max}}$, $H_{max} - z_t$,
$\displaystyle\frac{\bar{H}-z_t}{H_{max} - z_t}$, $\left(e_{CO_2} + e_{CH_4}\right)$, $A$

**ISSUE** We do not have the information about the maximum flow, but we have information about design power generation. Therefore we calculate the maximum flow from maximum head and installed capacity using the following equation:

$\begin{equation} Q_{t,max} = \displaystyle\frac{G_{HP,des}}{H_{max} - z_t} \end{equation}$

In [588]:
# HP_des = Hmax * Qdes -> Qdes = HP_des/Hmax
# Calculate the conversion coefficient
# Flow in Mm3/d
# HP in MW
# Head in m
# HP[MW] = flow[m3/s] * h[m] * 1000kg/m3 * 9.81 m2/s * 0.9 / 1e6
# flow[m3/s] = HP[MW] / h[m] * 1000 / (9.81 * 0.9)
# flow[Mm3/d] = flow[m3/s] * 3600 * 24 / 1000000
# flow[Mm3/d] = HP[MW] / h[m] * 1000 / (9.81 * 0.8) * 3600 * 24 / 1000000 = 
#               HP[MW] / h[m] * 24 * 3.6 / (9.81 * 0.8)
f_flow_hp = 24 * 3.6 / (9.81 * 0.8)

In [592]:
out_par_em_ifc['level_headroom'] = 1 - (out_par_em_ifc['level'] - out_par_em_ifc['turbine_elevation'])/\
    (out_par_em_ifc['max_level'] - out_par_em_ifc['turbine_elevation'])
out_par_em_ifc['flow_headroom'] = 1 - out_par_em_ifc['flow'].apply(megam3d_to_m3s) / out_par_em_ifc['Des_Disch']
out_par_em_ifc['plant_factor'] = out_par_em_ifc['hp'] / out_par_em_ifc['Inst_cap']
out_par_em_ifc['total_flow'] = out_par_em_ifc['flow'] + out_par_em_ifc['spillflow']
out_par_em_ifc['f_utility_flow'] = out_par_em_ifc['flow'] / out_par_em_ifc['total_flow']

out_par_em_ifc['des_head'] = out_par_em_ifc['max_level'] - out_par_em_ifc['turbine_elevation']
out_par_em_ifc['des_flow'] = out_par_em_ifc['Inst_cap'] / out_par_em_ifc['des_head'] * f_flow_hp
# Calculate factors for the ML model
# 1. des_flow
# 2. q_mean_des
out_par_em_ifc['q_mean_des']  = out_par_em_ifc['flow'] / out_par_em_ifc['des_flow']
# 3. des_head
# 4. h_mean_des
out_par_em_ifc['h_mean_des']  = (out_par_em_ifc['level'] - out_par_em_ifc['turbine_elevation']) / \
    out_par_em_ifc['des_head'] 
# 5. tot_em_net
# 6. res_area (cross-check against r_area_km2)

In [594]:
out_par_em_ifc.head()

Unnamed: 0,flow,hp,level,spillflow,status,min_vol,max_vol,max_level,turbine_elevation,reservoir_type,r_area_km2,Status,status_2_ifc,ro_r_or_sto_ifc,hp_type_reem,res_area,mean,pctile_2,pctile_3,pctile_5,pctile_10,ann_gen,em_intensity,hp_fraction,co2_net,ch4_net,tot_em_net,tot_em,em_intensity_range,ID,IFC_ID,DAM_HEIGHT,FSL (m),LWL (m),HRT,Des_Head,Des_Disch,STOR_MCM,Inst_cap,Annual Gen,RIV_ORD,geometry,level_headroom,flow_headroom,plant_factor,total_flow,f_utility_flow,des_head,des_flow,q_mean_des,h_mean_des
Baluchaung (upper),1.058157,15.114256,1140.0,0.882495,Under-Construction,2.2,2.2,1140,992,hydroelectric,,Future Hydroelectric,C,RoR,ror,,15.0,0.0,0.0,0.0,0.0,131.49,3.0,1.0,,,,0.39447,< 5,103,7,35.0,1140.0,1125.0,,148.0,16.0,2.0,30.0,90.0,5,POINT (96.78192 20.48459),0.0,0.234551,0.503809,1.940651,0.545258,148,2.231589,0.474172,1.0
Baluchaung 1,3.315697,22.4,867.0,101.294577,Built,2.0,2.0,867,797,hydroelectric,,Existing Reservoirs,E,RoR,ror,,22.0,22.0,22.0,22.0,22.0,192.852,3.0,1.0,,,,0.578556,< 5,100,2,11.0,867.0,864.0,1.0,70.0,46.0,2.0,28.0,200.0,4,POINT (97.28700 19.64800),0.0,0.165737,0.8,104.610274,0.031696,70,4.40367,0.752939,1.0
Baluchaung 2,3.299983,134.4,787.0,102.168026,Built,2.0,2.0,787,365,hydroelectric,,Existing Reservoirs,E,RoR,ror,,134.0,134.0,134.0,134.0,134.0,1174.644,3.0,1.0,,,,3.523932,< 5,101,4,,787.0,784.0,,422.0,48.0,2.0,168.0,1190.0,4,POINT (97.35800 19.55700),0.0,0.204287,0.8,105.468009,0.031289,422,4.382799,0.752939,1.0
Baluchaung 3,3.781058,41.6,339.0,101.686951,Built,2.0,2.0,339,225,hydroelectric,,Existing Reservoirs,E,RoR,ror,,42.0,42.0,42.0,42.0,42.0,368.172,3.0,1.0,,,,1.104516,< 5,102,6,,339.0,,,114.0,51.0,2.0,52.0,334.0,4,POINT (97.39700 19.54600),0.0,0.141917,0.8,105.468009,0.03585,114,5.021729,0.752939,1.0
Bawgata,1.765632,36.177379,227.288369,0.020921,LocMoU,557.0,835.0,313,60,hydroelectric,10.046,Future Hydroelectric,P,S,sto,10.046,36.0,2.1,2.2,2.4,3.0,315.576,29.247852,1.0,672.8369,244.5322,918.7657,9.22992,20 - 50,109,8,80.0,,,339.0,,,835.0,160.0,500.0,6,POINT (96.84850 18.25556),0.338781,,0.226109,1.786554,0.98829,253,6.962324,0.253598,0.661219


In [596]:
out_par_em_ifc[out_par_em_ifc['ID'].isna()]

Unnamed: 0,flow,hp,level,spillflow,status,min_vol,max_vol,max_level,turbine_elevation,reservoir_type,r_area_km2,Status,status_2_ifc,ro_r_or_sto_ifc,hp_type_reem,res_area,mean,pctile_2,pctile_3,pctile_5,pctile_10,ann_gen,em_intensity,hp_fraction,co2_net,ch4_net,tot_em_net,tot_em,em_intensity_range,ID,IFC_ID,DAM_HEIGHT,FSL (m),LWL (m),HRT,Des_Head,Des_Disch,STOR_MCM,Inst_cap,Annual Gen,RIV_ORD,geometry,level_headroom,flow_headroom,plant_factor,total_flow,f_utility_flow,des_head,des_flow,q_mean_des,h_mean_des


In [597]:
out_par_em_ifc_gdf = gpd.GeoDataFrame(
    out_par_em_ifc, geometry=out_par_em_ifc['geometry'], crs="EPSG:4326")
out_par_em_ifc.to_excel(pathlib.Path("intermediate/out_par_em_ifc.xlsx"))
out_par_em_ifc_gdf.to_file(pathlib.Path('intermediate/out_par_em_ifc.shp'))
out_par_em_ifc_gdf.to_file(pathlib.Path('intermediate/out_par_em_ifc.geojson', driver="GeoJSON"))

  out_par_em_ifc_gdf.to_file(pathlib.Path('intermediate/out_par_em_ifc.shp'))
