In [None]:
import rioxarray
import pandas as pd
import xarray as xr
import geopandas as gpd
import os

# Read in data

In [None]:

def read_process_csv_to_gdf(csv):
    temp_df = pd.read_csv(csv)
    temp_df['satellite'] = os.path.basename(csv)[:8]
    temp_df['year'] = int(os.path.basename(csv)[9:13])
    # temp_df = temp_df.loc[temp_df['hydropoly_max']<100]
    temp_df['area_ha'] = temp_df['area']*100/10000 # HA
    temp_df['area_km'] = temp_df['area']*100/(1000*1000) # km2
    # temp_df = temp_df.loc[temp_df['area_ha']<100] # Remove greater than 100 ha
    temp_gdf = gpd.GeoDataFrame(
        temp_df, geometry=gpd.points_from_xy(temp_df.longitude, temp_df.latitude),
        crs='EPSG:4326'
    )
    return temp_gdf

def read_process_region_csv(csv):
    temp_df = pd.read_csv(csv)
    temp_df['satellite'] = os.path.basename(csv)[:8]
    temp_df['year'] = int(os.path.basename(csv)[9:13])

    return temp_df

def sjoin_summarize(points_gdf, poly_gdf, poly_field):
    
    joined_gdf = gpd.sjoin(points_gdf, poly_gdf, predicate='within', how='inner')
    return joined_gdf.groupby(poly_field).count()

def sjoin_summarize_nogroup(points_gdf, poly_gdf):
    joined_gdf = gpd.sjoin(points_gdf, poly_gdf, predicate='within', how='inner')
    return joined_gdf[['area_ha']].agg(['sum', 'count', 'median'])

In [None]:
res_gdf = read_process_csv_to_gdf('../clean_summarize/out/sentinel_2021_v6_wgs84_combined_merged.csv')
res_gdf = res_gdf.loc[res_gdf['hydropoly_max']<100]
res_gdf['area_ha'] = res_gdf['area']*100/10000 # HA
res_gdf['area_km'] = res_gdf['area']*100/(1000*1000) # km2
res_gdf = res_gdf.loc[res_gdf['area_ha']<100]
res_gdf['area_m'] = res_gdf['area']*100

In [None]:
gdw_df = gpd.read_file('../../../../reservoir-id-cnn/reservoir-id-cnn/analysis/other_dams/gdw/GDW_v1_0_shp/GDW_barriers_v1_0.shp')
gdw_df = gdw_df.loc[gdw_df['COUNTRY']=='Brazil']
gdw_df_allhydro = gdw_df.loc[gdw_df['USE_ELEC'].isin(['Main', 'Sec'])]
gdw_df = gdw_df.loc[gdw_df['AREA_POLY'] <1]
gdw_df = gdw_df.loc[gdw_df['AREA_POLY'] > -1]

In [None]:
ana_gdf = gpd.read_file('../compare_other_methods/data/ana/Massas_d_Agua.shp')
ana_gdf = ana_gdf.loc[ana_gdf['detipomass']=='Artificial']
ana_gdf['geometry'] = ana_gdf['geometry'].centroid
ana_gdf = ana_gdf.to_crs('EPSG:4326')
ana_gdf_allhydro = ana_gdf.loc[ana_gdf['usoprinc']=='Hidrelétrica']
ana_gdf = ana_gdf.loc[ana_gdf['nuareakm2']<1]

# Reservoirs per watershed

In [None]:
level_name='nunivotto6'

In [None]:
dissolved_wgs84_path = './data/watersheds_2017_{}.shp'.format(level_name)
if not os.path.isfile(dissolved_wgs84_path):
    state_gdf = gpd.read_file('./data/Brazilian_States.shp').to_crs('EPSG:4326')
    watershed_gdf = gpd.read_file('data/bho_2017_v_01_05_50k.gpkg', layer='pgh_output.geoft_bho_area_drenagem'
                                    ).to_crs('EPSG:4326'
                                            ).dissolve(by=level_name
                                                    ).clip(state_gdf)
    watershed_gdf['area_km2'] = watershed_gdf.to_crs('ESRI:102033').area/(1000*1000)
    watershed_gdf.to_file(dissolved_wgs84_path)
    watershed_gdf = watershed_gdf.reset_index()
else:
    watershed_gdf = gpd.read_file(dissolved_wgs84_path)

In [None]:
res_watershed = sjoin_summarize(res_gdf, watershed_gdf, level_name)
gdw_watershed = sjoin_summarize(gdw_df, watershed_gdf, level_name)
gdw_hydro_watershed = sjoin_summarize(gdw_df_allhydro, watershed_gdf, level_name)
ana_watershed = sjoin_summarize(ana_gdf, watershed_gdf, level_name)
ana_hydro_watershed = sjoin_summarize(ana_gdf_allhydro, watershed_gdf, level_name)

In [None]:
print(watershed_gdf.shape)
print(res_watershed.shape[0]/watershed_gdf['nunivotto6'].unique().shape[0])
print(gdw_watershed.shape[0]/watershed_gdf['nunivotto6'].unique().shape[0])
print(gdw_hydro_watershed.shape[0]/watershed_gdf['nunivotto6'].unique().shape[0])
print(ana_watershed.shape[0]/watershed_gdf['nunivotto6'].unique().shape[0])
print(ana_hydro_watershed.shape[0]/watershed_gdf['nunivotto6'].unique().shape[0])

# Catchment Area Fragmentation Index (CAFI)

Did not end up using this, because it heavily weights downstream barriers compared to headwaters.

For example, a single dam was located at the outlet of a river would result in CAFI saying the watershed was 100% fragmented

Since most of our reservoirs are in headwaters, CAFI is low

In [None]:
flow_acc = rioxarray.open_rasterio("data/sa_acc_3s.tif")

In [None]:
xr_indexer = {
    'x':xr.DataArray(res_gdf['longitude'].values, dims=['res']),
    'y':xr.DataArray(res_gdf['latitude'].values, dims=['res'])
}
res_upstream_area =  flow_acc.sel(xr_indexer, method='nearest')
res_vals = res_upstream_area.values
print(res_vals.sum())
print(res_vals.max())

In [None]:
gdw_indexer = {
    'x':xr.DataArray(gdw_df['LONG_DAM'].values, dims=['res']),
    'y':xr.DataArray(gdw_df['LAT_DAM'].values, dims=['res'])
}
gdw_upstream_area =  flow_acc.sel(gdw_indexer, method='nearest')
gdw_vals = gdw_upstream_area.values
print(gdw_vals.sum())
print(gdw_vals.max())


In [None]:

ana_indexer = {
    'x':xr.DataArray(ana_gdf.geometry.x, dims=['res']),
    'y':xr.DataArray(ana_gdf.geometry.y, dims=['res'])
}
ana_upstream_area =  flow_acc.sel(ana_indexer, method='nearest')
ana_vals = ana_upstream_area.values
print(ana_vals.sum())
print(ana_vals.max())