# Calculation of input data required to generate bespoke input files for multiobjective dam placement study in Myanmar
### T. Janus
### Mui Ne, 28/12/2023

#### Update: 18/04/2024 and 03/11/2024
1. We need to discount emissions from multipurpose reservoirs when calculating emission intensity for hydropower generation. The discounting is proportional to the fraction of water that is abstracted from multipurpose reservoirs for the purposes other than hydroelectric power generation. In our case those other purposes are agricultural abstraction because our multipurpose reservoirs have only two purposes - HP generation and irrigation for crop production. First, we need to find the fraction of total 'utility' accounted for hydroelectric production in multipurpose reservoirs. We find this value from the column **hp_fraction** in the daframe stored in file **hp_multi_ror_emissions_and_generation.xlsx**. 

2. **RoR** generation has emission intensity of 3gCO$_{2e}$/kWh. This value is already represented in the tabular data generated in the previous notebooks. 

**NOTE:** Use IFC names throughout!!!

In [1]:
from __future__ import annotations
from typing import List, Dict, Any
import pathlib
from dataclasses import dataclass
import numpy as np
import geopandas as gpd
import pandas as pd

In [2]:
@dataclass
class PywrIFCMapper:
    """Mapping between Pywr (water resources model) and IFC naming"""
    name_map: pd.DataFrame
        
    @classmethod
    def from_file(cls, file_path: pathlib.Path) -> PywrIFCMapper:
        """ """
        return cls(name_map=pd.read_csv(file_path))
    
    @property
    def ifc_pywr_map(self) -> Dict[str, str]:
        """ """
        return self.name_map.set_index('ifc_name')['pywr_name'].to_dict()
    
    @property
    def pywr_ifc_map(self) -> Dict[str, str]:
        """ """
        return self.name_map.set_index('pywr_name')['ifc_name'].to_dict()
        
    def print_cls(self) -> None:
        print(self.name_map)
        
def list_nan_rows(df: pd.DataFrame, column_name: str) -> List[Any]:
    return df[df[column_name].isna()].index.tolist()

In [3]:
col_name_map_reemission_inputs = {
    "res_area_fractions_2": "urban_area_fraction_1",
    "res_area_fractions_5": "crop_area_fraction_1",
    "res_area_fractions_7": "forest_area_fraction_1",
    "res_area_fractions_11": "urban_area_fraction_2",
    "res_area_fractions_14": "crop_area_fraction_2",
    "res_area_fractions_16": "forest_area_fraction_2",
    "res_area_fractions_20": "urban_area_fraction_3",
    "res_area_fractions_23": "crop_area_fraction_3",
    "res_area_fractions_25": "forest_area_fraction_3",
}

ror_emission_intensity: float = 3.0 # We assume 3 gCO2e/kWh for all RoR plants

## 1. Import the required datasets

In [4]:
# Import tabular heet output data, pywr output hp summary data and shp data (for spatial inference)
# 1. shape file of reservoirs where data has been calculated in geoCARET
reservoirs_shp_data_path = pathlib.Path("bin/heet_outputs_MIN_LOW_PRI/reservoirs_updated.shp")
# 2. Merged data with IFC database and RE-Emission outputs
hp_summary_data_path = pathlib.Path("intermediate/merged_table.xlsx")
# 3. Full output data table from RE-Emission, including all scenarios, etc.
reemission_output_data_path = pathlib.Path("outputs/reemission/combined/combined_outputs.csv")
# 4. Mapping between IFC and Pywr (water resources model) names
ifc_pywr_map_path = pathlib.Path("config/ifc_pywr_name_map.csv")
# 5. IFC database of dams (GIS)
ifc_db_shp_path = pathlib.Path("bin/gis_layers/ifc_database/all_dams_replaced_refactored.shp")
# 6. Data with additional emissions calculated according to Soued et al.'s parameterization vs. climatic zone
em_reemission_soued_path = pathlib.Path("outputs/emissions_comparison/emissions_comparison.csv")

pywr_ifc_mapper = PywrIFCMapper.from_file(ifc_pywr_map_path)

# 0. IFC database of dams
ifc_gdf = gpd.read_file(ifc_db_shp_path)\
    .loc[:,['IFC_ID', 'geometry']]\
    .rename(columns={'IFC_ID': 'ifc_id'})

# 1. Import spatial data and remove all reservoirs that are not HP or multipurpose
reservoirs_gdf = gpd.read_file(reservoirs_shp_data_path)\
    .loc[:,['id', 'r_area_km2', 'geometry']]\
    .rename(columns={"id": "ifc_id"})
#.query("type == 'hydroelectric' | type == 'multipurpose'")\

reservoirs_gdf_all = gpd.read_file(reservoirs_shp_data_path)\
    .rename(columns={"id": "ifc_id"})
#.query("type == 'hydroelectric' | type == 'multipurpose'")\

# 2. Import tabular data with emissions and hp production
hp_summary = pd.read_excel(hp_summary_data_path)\
    .loc[:,[
        'ifc_id', 'dam_name', 'res_area', 'status_2_ifc', 'ro_r_or_sto_ifc', 'mean', 'pctile_10', 
        'co2_total_per_year', 'ch4_total_per_year', 'hp_type_reem']]
hp_summary['status'] = np.where(hp_summary['status_2_ifc'].isin(['E']), 'Existing', 'Future')
hp_summary = hp_summary\
    .assign(**{col: hp_summary[col].fillna(0) for col in 
               ['co2_total_per_year', 'ch4_total_per_year', 'res_area']})\
    .drop_duplicates()\
    .assign(dam_name=hp_summary['dam_name'].replace(pywr_ifc_mapper.pywr_ifc_map))\
    .sort_values(by="dam_name", ascending=True)\
    .eval('tot_em = co2_total_per_year + ch4_total_per_year')\
    .drop(columns=['co2_total_per_year', 'ch4_total_per_year', 'status_2_ifc', 'res_area'])\
    .rename(columns={
        "mean": "HP_mean", "pctile_10": "HP_firm", 'ro_r_or_sto_ifc' : 'hp_type_ifc',
        "dam_name": "name"})

# 3. Import the inputs tab from the outputs file from re-emission
reemission_inputs = pd.read_csv(reemission_output_data_path)\
    .loc[:,[
        'id', 'type', 'Scenario', 'res_area_fractions_2', "res_area_fractions_5",
        "res_area_fractions_7", "res_area_fractions_11", "res_area_fractions_14", "res_area_fractions_16",
        "res_area_fractions_20", "res_area_fractions_23", "res_area_fractions_25"]]\
    .query("Scenario == 'MIN_LOW_PRIM'")\
    .rename(columns=col_name_map_reemission_inputs)\
    .rename(columns={'id': 'ifc_id'})\
    .eval('urban_area_fraction = urban_area_fraction_1 + urban_area_fraction_2 + urban_area_fraction_3')\
    .eval('forest_area_fraction = forest_area_fraction_1 + forest_area_fraction_2 + forest_area_fraction_3')\
    .eval('crop_area_fraction = crop_area_fraction_1 + crop_area_fraction_2 + crop_area_fraction_3')\
    .drop(columns=[
        "Scenario", "urban_area_fraction_1", "urban_area_fraction_2", "urban_area_fraction_3",
        "crop_area_fraction_1", "crop_area_fraction_2", "crop_area_fraction_3",
        "forest_area_fraction_1", "forest_area_fraction_2", "forest_area_fraction_3"])

em_reemission_soued = pd.read_csv(em_reemission_soued_path)
em_reemission_soued['tot_em_soued'] = em_reemission_soued['em_net_total_soued'] * 1_000 # Unit conversion from Mtonnes to ktonnes
em_reemission_soued_trimmed = em_reemission_soued[['Name', 'id', 'tot_em_soued']]

# Merge total emissions from Soued et al into other dataframes : hp_summary and ...
hp_summary = pd.merge(
    hp_summary, em_reemission_soued_trimmed[['id', 'Name', 'tot_em_soued']], 
    left_on="name", 
    right_on="Name",
    how="left"
)
hp_summary = (hp_summary.
    assign(tot_em_soued=hp_summary['tot_em_soued'].fillna(0)).
    drop(columns=['id'])
)

In [5]:
mali = hp_summary.query("Name == 'Mali'")
assert mali.iloc[0]['ifc_id'] == 236

In [6]:
em_reemission_soued.query('Name == "Mali"') # It's misclassified as type irrigation

Unnamed: 0,Name,coordinates_0,coordinates_1,id,co2_diffusion,co2_net,ch4_diffusion,ch4_net,ch4_ebullition,ch4_degassing,...,co2_net_total,ch4_net_total,type,res_area,res_volume,em_net_total,co2_net_total_soued,ch4_net_total_soued,em_net_total_soued,tot_em_soued
184,Mali,24.9848,97.6135,9161,444.6194,248.3867,195.8819,1478.3807,144.9913,1137.5075,...,0.121709,0.724407,irrigation,0.49,4076097.0,0.846116,0.089339,0.375941,0.46528,465.279674


In [7]:
# 4. Import the dataframe with the column hp_fraction in file hp_multi_ror_emissions_and_generation.xlsx
file_path_hp_multi = pathlib.Path("intermediate/hp_multi_ror_emissions_and_generation.xlsx")
hp_multi_hp_fraction = pd.read_excel(file_path_hp_multi).loc[
    :, ['name', 'hp_fraction']]
# Add the missing Mali dam (it was wrongly classified as irrigation plus four other hydroelectric units)
new_rows = pd.DataFrame(
    {'name': ["Mali", 'Keng Tawng', 'Lemro 1', 'Mi Chaung', 'Nam Paw'], 
     'hp_fraction': [1.0, 1.0, 1.0, 1.0, 1.0]})
hp_multi_hp_fraction = pd.concat([hp_multi_hp_fraction, new_rows], ignore_index = True)

In [8]:
hp_multi_hp_fraction.query("name == 'Lemro 1'")

Unnamed: 0,name,hp_fraction
101,Lemro 1,1.0


In [9]:
# Hydroelectric storage reservoir (no RoR)
print(
    "Areal emissions in gCO2e/m2/year for hydroelectric and multipurpose dams"+
    "\n        (before discounting for multipurpose reservoirs)")
print("------------------------------------------------------------------------\n")
print(", ".join(map(str, reservoirs_gdf_all['tot_em'])))

Areal emissions in gCO2e/m2/year for hydroelectric and multipurpose dams
        (before discounting for multipurpose reservoirs)
------------------------------------------------------------------------

1188.0892000000001, 1588.9605999999999, 728.7407, 687.2817, 1276.4997, 1065.393, 1464.7842, 700.8770999999999, 3679.2146999999995, 1411.5149, 2442.1183, 1273.3301999999999, 556.6797, 2247.7806, 1171.9415000000001, 663.7312, 1358.3769999999997, 362.6175, 948.6897, 1883.6388, 1913.8798000000002, 3571.5829, 940.5544, 1277.1449000000002, 672.6514000000001, 965.2004999999999, 1327.4682, 1040.2435, 593.6146, 1106.4488000000001, 899.7382, 959.8141000000002, 646.3969000000001, 1347.8541, 1461.4318, 932.1362, 1184.3402999999998, 1189.0643, 1170.1600999999998, 2576.6608, 729.0183, 887.2598999999999, 1569.9877, 2225.8817, 1424.5793999999999, 1462.0267000000001, 1656.9551, 532.8104999999999, 1737.7314, 791.5833, nan, 661.2171999999999, 918.7657, 1188.9358, 855.4196, nan, 556.4868, 2459.18419999999

In [10]:
reservoirs_gdf_all[reservoirs_gdf_all['name'] == 'Mali']

Unnamed: 0,name,ifc_id,type,r_volume_m,r_area_km2,r_maximum_,r_mean_dep,r_msocs_kg,r_mghr_all,r_mghr_may,...,c_masm_mm,c_biome,c_soil_typ,c_mean_ols,ms_length,co2_net,ch4_net,n2o_mean,tot_em,geometry
96,Mali,9161,irrigation,4076097.0,0.49,19.0,8.3,6.106,4.57,4.166,...,227.0,Tropical & Subtropical Moist Broadleaf Forests,MINERAL,7.625,1.319,248.3867,1478.3807,0.034,1726.8014,"POLYGON ((97.61250 24.98250, 97.61250 24.98750..."


## 2. Join all three dataframes together

In [11]:
# First convert the classification of Mali dam from irrigation to hydroelectric
mali_index = reservoirs_gdf_all.query('name == "Mali"').index[0]
reservoirs_gdf_all.at[mali_index,'type'] = "hydroelectric"

merged_df = reemission_inputs\
    .merge(hp_summary, on='ifc_id', how='right')\
    .merge(reservoirs_gdf, on='ifc_id', how='left')

merged_df = merged_df\
    .assign(**{col: merged_df[col].fillna(0) for col in 
               ['urban_area_fraction', 'forest_area_fraction', 'crop_area_fraction',
                'r_area_km2']})\
    .merge(ifc_gdf, on='ifc_id', suffixes=('_df1', '_ifc'))

merged_df = merged_df\
    .assign(**{col: merged_df[col].fillna(merged_df['geometry_ifc']) for col in 
               ['geometry_df1']})\
    .assign(**{col: merged_df[col].fillna('hydroelectric') for col in 
               ['type']})\
    .drop(columns='geometry_ifc')\
    .rename(columns={'geometry_df1': 'geometry'})\
    .eval('urban_area_loss_km2 = urban_area_fraction * r_area_km2')\
    .eval('forest_area_loss_km2 = forest_area_fraction * r_area_km2')\
    .eval('crop_area_loss_km2 = crop_area_fraction * r_area_km2')#\
    #.drop(columns=['urban_area_fraction', 'forest_area_fraction', 'crop_area_fraction', 'r_area_km2'])

In [12]:
merged_df.head(2)

Unnamed: 0,ifc_id,type,urban_area_fraction,forest_area_fraction,crop_area_fraction,name,hp_type_ifc,HP_mean,HP_firm,hp_type_reem,status,tot_em,Name,tot_em_soued,r_area_km2,geometry,urban_area_loss_km2,forest_area_loss_km2,crop_area_loss_km2
0,7,hydroelectric,0.0,0.0,0.0,Baluchaung (upper),RoR,15.0,0.0,ror,Future,0.0,,0.0,0.0,POINT (96.78192 20.48459),0.0,0.0,0.0
1,2,hydroelectric,0.0,0.0,0.0,Baluchaung 1,RoR,22.0,22.0,ror,Existing,0.0,,0.0,0.0,POINT (97.28700 19.64800),0.0,0.0,0.0


### `merged_df_plot` includes additional columns for visualisation that is not needed for optimization, such as e.g. emission intensities of hydroelectric dams and emisission intensities of hydropower generation of multipurpose dams

In [13]:
# Join dataframes for plotting maps (visualising MOO results)
merged_df_plot = reemission_inputs\
    .merge(hp_summary, on='ifc_id', how='right')\
    .merge(reservoirs_gdf_all, on='ifc_id', how='left')\
    .rename(columns={"type_x": "type", 'name_x': 'name', 'tot_em_x': 'tot_em'})

merged_df_plot = merged_df_plot\
    .assign(**{col: merged_df_plot[col].fillna(0) for col in 
               ['urban_area_fraction', 'forest_area_fraction', 'crop_area_fraction',
                'r_area_km2']})\
    .merge(ifc_gdf, on='ifc_id', suffixes=('_df1', '_ifc'))

merged_df_plot = merged_df_plot\
    .assign(**{col: merged_df_plot[col].fillna(merged_df_plot['geometry_ifc']) for col in 
               ['geometry_df1']})\
    .assign(**{col: merged_df_plot[col].fillna('hydroelectric') for col in 
               ['type']})\
    .rename(columns={'geometry_df1': 'geometry'})\
    .eval('urban_area_loss_km2 = urban_area_fraction * r_area_km2')\
    .eval('forest_area_loss_km2 = forest_area_fraction * r_area_km2')\
    .eval('crop_area_loss_km2 = crop_area_fraction * r_area_km2')\
    .drop(columns=['urban_area_fraction', 'forest_area_fraction', 'crop_area_fraction'])

# Calculate composite values for plotting
merged_df_plot["coordinates_1"] = merged_df_plot.geometry_ifc.apply(lambda p: p.x)
merged_df_plot["coordinates_0"] = merged_df_plot.geometry_ifc.apply(lambda p: p.y)
merged_df_plot['HP Production [GWh/year]'] = merged_df_plot["HP_mean"] * 365.25 * 24 / 1_000
merged_df_plot['Mean HP [GWh/d]'] = merged_df_plot["HP_mean"] * 24 / 1_000
merged_df_plot['Firm HP [GWh/d]'] = merged_df_plot['HP_firm'] * 24 / 1_000
merged_df_plot['Firm Power Ratio, [%]'] = merged_df_plot['Firm HP [GWh/d]'] / merged_df_plot['Mean HP [GWh/d]'] * 100
merged_df_plot = pd.merge(
    merged_df_plot, hp_multi_hp_fraction, how="outer", left_on="name", right_on="name")\
    .dropna(subset=['name'])
# Fill tot_em values for RoR HP
# Force emission intensity of 3 gCO2eq/kWh
ror_em_intensity = 3
# Calculate emissions of RoR using emission intensity and generation
merged_df_plot.loc[merged_df_plot['tot_em'] == 0, 'tot_em'] = \
    merged_df_plot['HP_mean'] * ror_em_intensity / 1_000 * 365.25 * 24
merged_df_plot.loc[merged_df_plot['tot_em_soued'] == 0, 'tot_em_soued'] = \
    merged_df_plot['HP_mean'] * ror_em_intensity / 1_000 * 365.25 * 24
# Scale emissions of multipurpose reservoirs
merged_df_plot['tot_em_hp'] = merged_df_plot['tot_em'] * merged_df_plot['hp_fraction']
merged_df_plot['tot_em_soued_hp'] = merged_df_plot['tot_em_soued'] * merged_df_plot['hp_fraction']

merged_df_plot.loc[merged_df_plot['tot_em_soued'] == 0, 'tot_em_soued'] = \
    merged_df_plot['HP_mean'] * ror_em_intensity / 1_000 * 365.25 * 24
merged_df_plot['GHG intensity [gCO2,eq/kWh]'] = \
    merged_df_plot['tot_em'] / merged_df_plot['HP_mean'] * 1_000 / 365.25 / 24
merged_df_plot['GHG intensity Soued [gCO2,eq/kWh]'] = \
    merged_df_plot['tot_em_soued'] / merged_df_plot['HP_mean'] * 1_000 / 365.25 / 24
merged_df_plot['GHG intensity HP [gCO2,eq/kWh]'] = \
    merged_df_plot['tot_em_hp'] / merged_df_plot['HP_mean'] * 1_000 / 365.25 / 24
merged_df_plot['GHG intensity Soued HP [gCO2,eq/kWh]'] = \
    merged_df_plot['tot_em_soued_hp'] / merged_df_plot['HP_mean'] * 1_000 / 365.25 / 24
merged_df_plot['Volume, Mm3'] = merged_df_plot['r_volume_m'] / 1_000_000
merged_df_plot['Area, km2'] = merged_df_plot['r_area_km2']
# Note: tot_em is in tCO2eq yr-1
merged_df_plot['GHG, tCO2eq/yr'] = merged_df_plot['tot_em']
merged_df_plot['GHG Soued, tCO2eq/yr'] = merged_df_plot['tot_em_soued']
merged_df_plot['GHG HP, tCO2eq/yr'] = merged_df_plot['tot_em_hp']
merged_df_plot['GHG Soued HP, tCO2eq/yr'] = merged_df_plot['tot_em_soued_hp']
merged_df_plot = merged_df_plot\
    .drop(columns=['r_volume_m', 'r_area_km2'])
# Fill NA volume values with default_ror_volume
default_ror_volume = 5 # Mm3
merged_df_plot['Volume, Mm3'] = merged_df_plot['Volume, Mm3'].fillna(default_ror_volume)
merged_df_plot.to_csv(pathlib.Path("intermediate/dams_for_plotting_moo.csv"), index=False)

In [14]:
# Specify the desired column order
col_order = [
    'ifc_id', 'name', 'type', 'status', 'hp_type_ifc', 'hp_type_reem',
    'HP_mean', 'HP_firm', 'tot_em', 'tot_em_soued', 'urban_area_loss_km2', 'forest_area_loss_km2',
    'crop_area_loss_km2', 'geometry']

In [15]:
merged_df_plot_trimmed = merged_df_plot[col_order]

In [16]:
merged_df_plot_trimmed.query('ifc_id == 73')

Unnamed: 0,ifc_id,name,type,status,hp_type_ifc,hp_type_reem,HP_mean,HP_firm,tot_em,tot_em_soued,urban_area_loss_km2,forest_area_loss_km2,crop_area_loss_km2,geometry
58,73,Nam Paw,hydroelectric,Future,S,ror,11.0,1.4,289.278,289.278,0.0,0.0,0.0,POINT (97.91100 23.94434)


In [17]:
# Fill in some remaining tot_em_values
rows_with_null_tot_em = merged_df_plot_trimmed[merged_df_plot_trimmed['tot_em'].isnull()]
# Assume all those rows for which tot_em is null are ror sites for which emissions have not been calculated
# For reemission total intensities
merged_df_plot_trimmed.loc[merged_df_plot_trimmed['tot_em'].isnull(), 'tot_em'] = \
    ror_em_intensity * \
    merged_df_plot_trimmed.loc[merged_df_plot_trimmed['tot_em'].isnull(), 'HP_mean'] / 1_000 * \
    365.25 * 24
# For Soued's total intensities
merged_df_plot_trimmed.loc[merged_df_plot_trimmed['tot_em_soued'].isnull(), 'tot_em'] = \
    ror_em_intensity * \
    merged_df_plot_trimmed.loc[merged_df_plot_trimmed['tot_em_soued'].isnull(), 'HP_mean'] / 1_000 * \
    365.25 * 24

In [18]:
rows_with_null_tot_em = merged_df_plot_trimmed[merged_df_plot_trimmed['tot_em'].isnull()]
rows_with_null_tot_em_soued = merged_df_plot_trimmed[merged_df_plot_trimmed['tot_em_soued'].isnull()]

In [19]:
merged_gdf = gpd.GeoDataFrame(merged_df_plot_trimmed, geometry=merged_df_plot_trimmed['geometry'])

## 3. Intersect the merged dataframe with village data points

In [20]:
villages_gdf = gpd.read_file(
    pathlib.Path(
        "bin/gis_layers/hotosm_mmr_populated_places_points_shp/hotosm_mmr_populated_places_points.shp"))

In [21]:
flooded_villages = \
    gpd.sjoin(villages_gdf, merged_gdf, how='left')\
    .groupby('name_right').count()['osm_id'].reset_index()\
    .rename(columns={"osm_id" : "count", "name_right" : "name"})

flooded_villages

Unnamed: 0,name,count
0,Belin,9
1,Laza,8
2,Lemro 2,9
3,Manipur,1
4,Mantong,1
5,Mong Ton,1
6,Myitsone,3
7,Renan,2
8,Suo Lwe,1
9,Tamanthi,15


## 4. Add information about flooded villages to `merged_gdf`

In [22]:
final_gdf = merged_gdf.merge(flooded_villages, on="name", how='left')\
    .rename(columns={"count" : "flooded_villages"})
final_gdf["flooded_villages"] = final_gdf["flooded_villages"].fillna(0)
final_gdf.head()

Unnamed: 0,ifc_id,name,type,status,hp_type_ifc,hp_type_reem,HP_mean,HP_firm,tot_em,tot_em_soued,urban_area_loss_km2,forest_area_loss_km2,crop_area_loss_km2,geometry,flooded_villages
0,7,Baluchaung (upper),hydroelectric,Future,RoR,ror,15.0,0.0,394.47,394.47,0.0,0.0,0.0,POINT (96.78192 20.48459),0.0
1,2,Baluchaung 1,hydroelectric,Existing,RoR,ror,22.0,22.0,578.556,578.556,0.0,0.0,0.0,POINT (97.28700 19.64800),0.0
2,4,Baluchaung 2,hydroelectric,Existing,RoR,ror,134.0,134.0,3523.932,3523.932,0.0,0.0,0.0,POINT (97.35800 19.55700),0.0
3,6,Baluchaung 3,hydroelectric,Existing,RoR,ror,42.0,42.0,1104.516,1104.516,0.0,0.0,0.0,POINT (97.39700 19.54600),0.0
4,8,Bawgata,hydroelectric,Future,S,sto,36.0,3.0,9215.8903,21316.404722,0.0,9.764712,0.0,"POLYGON ((96.85833 18.26250, 96.85833 18.27083...",0.0


## 5. Save dataframes for post-processing
1. Save Future reservoirs for optimization purposes
2. Save Existing reservoirs for statistics on existing hydroelectric reservoirs

In [23]:
output_folder = pathlib.Path("outputs/moo")
if not output_folder.exists():
    output_folder.mkdir()
# Add status_int column
final_gdf['status_int'] = final_gdf['status'].map({'Existing': 1, 'Future': 0})
final_gdf\
    .drop(columns=['geometry'])\
    .to_csv(output_folder/'all_hp.csv')
    
final_gdf\
    .query('status == "Existing"')\
    .drop(columns=['geometry'])\
    .to_csv(output_folder/'existing_hp.csv')

final_gdf\
    .query('status == "Future"')\
    .drop(columns=['geometry', 'type', 'status', 'hp_type_ifc', 'hp_type_reem'])\
    .to_csv(output_folder/'future_hp.csv')

In [24]:
final_gdf.head()

Unnamed: 0,ifc_id,name,type,status,hp_type_ifc,hp_type_reem,HP_mean,HP_firm,tot_em,tot_em_soued,urban_area_loss_km2,forest_area_loss_km2,crop_area_loss_km2,geometry,flooded_villages,status_int
0,7,Baluchaung (upper),hydroelectric,Future,RoR,ror,15.0,0.0,394.47,394.47,0.0,0.0,0.0,POINT (96.78192 20.48459),0.0,0
1,2,Baluchaung 1,hydroelectric,Existing,RoR,ror,22.0,22.0,578.556,578.556,0.0,0.0,0.0,POINT (97.28700 19.64800),0.0,1
2,4,Baluchaung 2,hydroelectric,Existing,RoR,ror,134.0,134.0,3523.932,3523.932,0.0,0.0,0.0,POINT (97.35800 19.55700),0.0,1
3,6,Baluchaung 3,hydroelectric,Existing,RoR,ror,42.0,42.0,1104.516,1104.516,0.0,0.0,0.0,POINT (97.39700 19.54600),0.0,1
4,8,Bawgata,hydroelectric,Future,S,sto,36.0,3.0,9215.8903,21316.404722,0.0,9.764712,0.0,"POLYGON ((96.85833 18.26250, 96.85833 18.27083...",0.0,0
