# Retrieval of data to put in the manuscript text/tables

In [None]:
import pathlib
import numpy as np
import pandas as pd
import geopandas as gpd

In [None]:
def filter_dataframe_by_indices(df, indices):
    """
    Filters rows from the dataframe by a list of indices. For indices that do not exist,
    prints out the missing indices and returns a dataframe with only the existing indices.

    Args:
        df (pd.DataFrame): The input dataframe.
        indices (list): List of indices to filter by.

    Returns:
        pd.DataFrame: A dataframe containing only the rows with the existing indices.
    """
    # Convert indices to a set for faster operations
    df_indices = set(df.index)
    requested_indices = set(indices)

    # Find missing indices
    missing_indices = requested_indices - df_indices
    if missing_indices:
        print(f"Missing indices: {missing_indices}")

    # Filter only the existing indices
    existing_indices = requested_indices & df_indices
    filtered_df = df.loc[existing_indices]

    return filtered_df

In [None]:
# Location of reemission outputs
outputs_reemission = pathlib.Path("outputs/reemission/outputs_MIN_LOW_PRIM.xlsx")
outputs = pathlib.Path("outputs/emissions_comparison/emissions_comparison.xlsx")
reemission_inputs = pd.read_excel(outputs_reemission,'inputs')
reemission_outputs = pd.read_excel(outputs)
reemission_outputs['total_net'] = reemission_outputs['co2_net'] + reemission_outputs['ch4_net']
# Calculate volumes in km3
reemission_outputs['res_volume_km3'] = reemission_outputs['res_volume'] / 1_000_000_000
reemission_inputs['res_volume_km3'] = reemission_inputs['res_volume'] / 1_000_000_000
# Calculate net emissions for soued using the regression from Almeida et al. 2017
reemission_outputs['co2_net_soued'] = reemission_outputs['co2_soued'] * 0.25 * (1 + 0.17)
reemission_outputs['ch4_net_soued'] = reemission_outputs['ch4_soued'] * 0.90 * (1 + 0.17)
reemission_outputs['total_net_soued'] = \
    reemission_outputs['co2_net_soued'] + reemission_outputs['ch4_net_soued']
hp_inputs = reemission_inputs.query('type=="hydroelectric"')
irr_inputs = reemission_inputs.query('type=="irrigation"')
multi_inputs = reemission_inputs.query('type=="multipurpose"')
hp_names = hp_inputs['Name'].to_list()
irr_names = irr_inputs['Name'].to_list()
multi_names = multi_inputs['Name'].to_list()
hp_mask = reemission_outputs['Name'].isin(hp_names)
irr_mask = reemission_outputs['Name'].isin(irr_names)
multi_mask = reemission_outputs['Name'].isin(multi_names)
hp_outputs = reemission_outputs[hp_mask]
irr_outputs = reemission_outputs[irr_mask]
multi_outputs = reemission_outputs[multi_mask]
# MOO inputs
moo_inputs_path = pathlib.Path("outputs/moo/all_hp.csv")
# SHAPE WITH EMISSION INTENSITIES
shp_path = pathlib.Path("intermediate/out_par_em_ifc.shp")

# Filter MOO inputs
moo_inputs = pd.read_csv(moo_inputs_path)
# units:
# HP_mean in MW
# tot_em in tonnesCO2/anum
# em_intensity in gCO2/kWh 

moo_inputs_future_sto = moo_inputs.query('type=="hydroelectric" & status=="Future" & hp_type_reem =="sto"')
moo_inputs_existing_sto = moo_inputs.query('type=="hydroelectric" & status=="Existing" & hp_type_reem =="sto"')
moo_inputs_future_multi = moo_inputs.query('type=="multipurpose" & status=="Future" & hp_type_reem =="sto"')
moo_inputs_existing_multi = moo_inputs.query('type=="multipurpose" & status=="Existing" & hp_type_reem =="sto"')

moo_inputs_future = moo_inputs.query('status=="Future" & hp_type_reem =="sto"')
moo_inputs_existing = moo_inputs.query('status=="Existing" & hp_type_reem =="sto"')

def calc_em_intensity(data):
    return data['tot_em'] * 1000000 / (data['HP_mean'] * 24 * 365.25 * 1000) 

def calc_em(data):
    return data['total_net']

def calc_em_intensity_soued(data):
    return data['tot_em_soued'] * 1000000 / (data['HP_mean'] * 24 * 365.25 * 1000) 

def calc_em_soued(data):
    return data['total_net_soued']

# Get ids of assets - future sto, existing sto, future multi, existing multi
sto_future_ids = moo_inputs_future_sto['ifc_id'].values
sto_existing_ids = np.append(moo_inputs_existing_sto['ifc_id'].values, 98)
multi_future_ids = moo_inputs_future_multi['ifc_id'].values
multi_existing_ids = np.append(moo_inputs_existing_multi['ifc_id'].values, 96)
# Find irrigation resrvoir ids
sto_multi_ids = np.concatenate((
    sto_future_ids,
    sto_existing_ids,
    multi_future_ids,
    multi_existing_ids))
irr_ids = np.array(list(set(reemission_outputs['id']) - set(sto_multi_ids)))

## 1. Statistics for Table 2 in Supplementary Information

### a. Future Hydroelectric

In [None]:
filter_dataframe_by_indices(reemission_outputs.set_index("id"),sto_future_ids).describe()

In [None]:
filter_dataframe_by_indices(reemission_outputs.set_index("id"),sto_future_ids).\
    loc[:,['res_area', 'res_volume_km3', 'em_net_total', 'em_net_total_soued']].sum()

In [None]:
filter_dataframe_by_indices(reemission_inputs.set_index("id"),sto_future_ids).\
    loc[:,['res_area', 'res_volume_km3', 'res_mean_depth']].mean()

### a. Existing Hydroelectric

In [None]:
filter_dataframe_by_indices(reemission_outputs.set_index("id"),sto_existing_ids).describe()

In [None]:
filter_dataframe_by_indices(reemission_outputs.set_index("id"),sto_existing_ids).\
    loc[:,['res_area', 'res_volume_km3', 'em_net_total', 'em_net_total_soued']].sum()

In [None]:
filter_dataframe_by_indices(reemission_inputs.set_index("id"),sto_existing_ids).\
    loc[:,['res_area', 'res_volume_km3', 'res_mean_depth']].mean()

### c. Future Multipurpose

In [None]:
filter_dataframe_by_indices(reemission_outputs.set_index("id"),multi_future_ids).\
    loc[:,['res_area', 'res_volume_km3', 'em_net_total', 'em_net_total_soued']].sum()

In [None]:
filter_dataframe_by_indices(reemission_inputs.set_index("id"),multi_future_ids).\
    loc[:,['res_area', 'res_volume_km3', 'res_mean_depth']].mean()

### d. Existing Multipurpose

In [None]:
filter_dataframe_by_indices(reemission_outputs.set_index("id"),multi_existing_ids).\
    loc[:,['res_area', 'res_volume_km3', 'em_net_total', 'em_net_total_soued']].sum()

In [None]:
filter_dataframe_by_indices(reemission_inputs.set_index("id"),multi_existing_ids).\
    loc[:,['res_area', 'res_volume_km3', 'res_mean_depth']].mean()

In [None]:
mp_hp = 844.0 / (844.0 + 369.5) * 1340.7
mp_ir = 369.5 / (844.0 + 369.5) * 1340.7
print(mp_hp, mp_ir)

### e. Irrigation

In [None]:
filter_dataframe_by_indices(reemission_outputs.set_index("id"),irr_ids).\
    loc[:,['res_area', 'res_volume_km3', 'em_net_total', 'em_net_total_soued']].sum()

In [None]:
filter_dataframe_by_indices(reemission_outputs.set_index("id"),irr_ids).\
    loc[:,['res_area', 'res_volume_km3', 'em_net_total', 'em_net_total_soued']].mean()

## Additional statistics... Hydroelectric reservoirs

In [None]:
# Areas in km2
hp_inputs['res_area'].describe()

In [None]:
# Provide volumes in km3
(hp_inputs['res_volume']/1e9).describe()

In [None]:
hp_inputs['res_mean_depth'].describe()

In [None]:
hp_outputs[['co2_net', 'co2_net_soued']].describe()

In [None]:
hp_outputs[['co2_net_total', 'co2_net_total_soued']].describe()

In [None]:
hp_outputs[['ch4_net', 'ch4_net']].describe()

In [None]:
hp_outputs[['total_net', 'total_net_soued']].describe()

## Irrigation reservoirs

In [None]:
irr_inputs['res_area'].describe()

In [None]:
# Provide volumes in km3
(irr_inputs['res_volume']/1e9).describe()

In [None]:
# sorted(irr_inputs['res_area'].to_list())
irr_inputs['res_mean_depth'].describe()

In [None]:
irr_outputs[['total_net', 'total_net_soued']].describe()

In [None]:
irr_outputs[['co2_net', 'co2_net_soued']].describe()

In [None]:
irr_outputs[['ch4_net', 'ch4_net_soued']].describe()

In [None]:
irr_inputs['res_mean_depth'].hist(bins=100)

## Multipurpose reservoirs

In [None]:
multi_inputs['res_area'].describe()

In [None]:
# Provide volumes in km3
(multi_inputs['res_volume']/1e9).describe()

In [None]:
# sorted(irr_inputs['res_area'].to_list())
multi_inputs['res_mean_depth'].describe()

In [None]:
multi_outputs[['total_net', 'total_net_soued']].describe()

In [None]:
multi_outputs[['co2_net', 'co2_net_soued']].describe()

In [None]:
multi_outputs[['ch4_net', 'ch4_net_soued']].describe()

## Data needed for the Emission intensities of existing and planned hydropower section

In [None]:
moo_inputs_future_sto

In [None]:
print("Emission intensities future storage hydro")
pd.DataFrame(
    {
        "G-res": calc_em_intensity(data=moo_inputs_future_sto).describe(),
        "Soued": calc_em_intensity_soued(data=moo_inputs_future_sto).describe()
    })

In [None]:
print("Emission intensities existing storage hydro")
pd.DataFrame(
    {
        "G-res": calc_em_intensity(data=moo_inputs_existing_sto).describe(),
        "Soued": calc_em_intensity_soued(data=moo_inputs_existing_sto).describe()
    })

In [None]:
print("Emission intensities future multipurpose")
pd.DataFrame(
    {
        "G-res": calc_em_intensity(moo_inputs_future_multi).describe(),
        "Soued": calc_em_intensity_soued(moo_inputs_future_multi).describe()
    })

In [None]:
print("Emission intensities existing multipurpose")
pd.DataFrame(
    {
        "G-res": calc_em_intensity(moo_inputs_existing_multi).describe(),
        "Soued": calc_em_intensity_soued(moo_inputs_existing_multi).describe()
    })

In [None]:
print("Emission intensities future assets")
pd.DataFrame(
    {
        "G-res": calc_em_intensity(moo_inputs_future).describe(),
        "Soued": calc_em_intensity_soued(moo_inputs_future).describe()
    })

In [None]:
print("Emission intensities existing assets")
pd.DataFrame(
    {
        "G-res": calc_em_intensity(moo_inputs_existing).describe(),
        "Soued": calc_em_intensity_soued(moo_inputs_existing).describe()
    })

In [None]:
moo_inputs.query('status=="Future"')["HP_mean"].sum()

In [None]:
moo_inputs.query('status=="Existing"')["HP_mean"].sum()

In [None]:
moo_inputs.query('type=="multipurpose"').head()

In [None]:
moo_inputs.head()

In [None]:
moo_inputs.query('status == "Future" and hp_type_ifc == "RoR"').count().values[0]

In [None]:
moo_inputs.query('status == "Existing" and hp_type_ifc == "RoR"').count().values[0]

In [None]:
moo_inputs.query('type == "hydroelectric" and status == "Existing" and hp_type_ifc == "S"').count().values[0]

In [None]:
moo_inputs.query('type == "multipurpose" and status == "Existing" and hp_type_ifc == "S"').count().values[0]

### Find emission intensities (note: does not include information about emissions from emission factors)

In [None]:
gdf = gpd.read_file(shp_path).rename(columns={'reservoir_': 'res_type'})

In [None]:
gdf.columns

In [None]:
gdf['status']

In [None]:
print(gdf[['hp_type_re', 'em_intensi']])

In [None]:
gdf.query('hp_type_re != "ror"')['em_intensi'].describe()

In [None]:
gdf_trimmed = gdf[gdf['em_intensi']>0.6]

In [None]:
gdf_trimmed.query('hp_type_re != "ror"')['em_intensi'].describe()

In [None]:
gdf.query('res_type == "multipurpose"')['em_intensi'].describe()

In [None]:
gdf[(gdf.ro_r_or_st == 'S') & (gdf.res_type == 'hydroelectric')]['em_intensi'].describe()

In [None]:
gdf.query('res_type == "multipurpose"')['des_head_1'].astype(float).describe()

In [None]:
gdf[(gdf.ro_r_or_st == 'S') & (gdf.res_type == 'hydroelectric')]['des_head_1'].astype(float).describe()

In [None]:
hp_sto = gdf[(gdf.ro_r_or_st == 'S') & (gdf.res_type == 'hydroelectric')]
hp_sto.query('status == "Built"')['em_intensi'].describe()

In [None]:
hp_sto.query('status != "Built"')['em_intensi'].describe()

In [None]:
gdf[gdf['index'] == "Lemro 2"]['em_intensi']

In [None]:
gdf[gdf['index'] == "Belin"]['em_intensi']

In [None]:
gdf[gdf['index'] == "Laza"]['em_intensi']

### Hydropower generation

In [None]:
current_gen = gdf.query('status == "Built"')['ann_gen'].sum()

In [None]:
planned_gen = gdf.query('status != "Built"')['ann_gen'].sum()

In [None]:
current_gen, planned_gen

In [None]:
current_gen + planned_gen

In [None]:
current_gen / (current_gen + planned_gen)

### Optimization

In [None]:
nondom = pd.read_csv('em_int_nondom_df.csv')

In [None]:
nobuilt = nondom[nondom['Scenario, [1/0]']==0]
built = nondom[nondom['Scenario, [1/0]']==1]

In [None]:
nobuilt[nobuilt['Mean annual HP, [MW]']>1814].head(1)

In [None]:
built.head(1)