In [1]:
import geopandas as gpd
import glob
import pandas as pd
import os
import matplotlib.pyplot as plt
import matplotlib as mpl

# Read in Data

In [2]:
def read_process_csv_to_gdf(csv):
    temp_df = pd.read_csv(csv)
    temp_df['satellite'] = os.path.basename(csv)[:3]
    temp_df['year'] = int(os.path.basename(csv)[4:8])
    temp_df = temp_df.loc[temp_df['hydropoly_max']<100]
    temp_df['area_ha'] = temp_df['area']*100/10000 # HA
    temp_df['area_km'] = temp_df['area']*100/(1000*1000) # km2
    temp_df = temp_df.loc[temp_df['area_ha']<100] # Remove greater than 100 ha
    temp_gdf = gpd.GeoDataFrame(
        temp_df, geometry=gpd.points_from_xy(temp_df.center_lon, temp_df.center_lat),
        crs='ESRI:102033'
    )
    return temp_gdf

def read_process_region_csv(csv):
    temp_df = pd.read_csv(csv)
    temp_df['satellite'] = os.path.basename(csv)[:3]
    temp_df['year'] = int(os.path.basename(csv)[4:8])

    return temp_df

In [3]:
all_csvs = glob.glob('../clean_summarize/out/v3_cloudfilt/ls*v3_merged.csv')
all_csvs.sort()

In [4]:
def sjoin_summarize(points_gdf, poly_gdf, poly_field):
    
    joined_gdf = gpd.sjoin(points_gdf, poly_gdf, predicate='within', how='inner')
    return joined_gdf[['area_ha', poly_field]].groupby(poly_field).agg(['sum', 'count', 'median'])['area_ha']
    

# River Basin/Hydrographic Region

In [5]:
basin_gdf = gpd.read_file('./data/macro_RH.shp').to_crs('ESRI:102033')

# Biome

In [6]:

biome_gdf = gpd.read_file('./data/lm_bioma_250.shp').to_crs('ESRI:102033')

# State

In [7]:
state_gdf = gpd.read_file('./data/Brazilian_States.shp').to_crs('ESRI:102033')

# Run them all by year

In [None]:
for in_csv in all_csvs:
    basin_out_csv = './out/{}'.format(os.path.basename(in_csv).replace('merged.csv', 'basin_stats.csv'))
    state_out_csv = './out/{}'.format(os.path.basename(in_csv).replace('merged.csv', 'state_stats.csv'))
    biome_out_csv = './out/{}'.format(os.path.basename(in_csv).replace('merged.csv', 'biome_stats.csv'))

    if (not os.path.isfile(basin_out_csv)) or (not os.path.isfile(state_out_csv) or (not os.path.isfile(biome_out_csv))):
        res_gdf = read_process_csv_to_gdf(in_csv)
    if (not os.path.isfile(basin_out_csv)):
        basin_results = sjoin_summarize(res_gdf, basin_gdf, 'nm_macroRH')
        basin_results.to_csv(basin_out_csv)
    if (not os.path.isfile(state_out_csv)):
        state_results = sjoin_summarize(res_gdf, state_gdf, 'UF_05')
        state_results.to_csv(state_out_csv)
    if (not os.path.isfile(biome_out_csv)):
        biome_results = sjoin_summarize(res_gdf, biome_gdf, 'Bioma')
        biome_results.to_csv(biome_out_csv)
    print(os.path.basename(in_csv), "Done")

 # Make some figures

### Basin

In [9]:
basin_shortname_dict = {
    'AMAZÔNICA':'AMZ',
    'ATLÂNTICO LESTE': 'ALT',
    'ATLÂNTICO NORDESTE OCIDENTAL': 'AOC',
    'ATLÂNTICO NORDESTE ORIENTAL': 'AOR',
    'ATLÂNTICO SUDESTE': 'ASD',
    'ATLÂNTICO SUL':'ASU',
    'PARAGUAI':'PRG',
    'PARANÁ':'PRN',
    'PARNAÍBA':'PNB',
    'SÃO FRANCISCO':'SFO',
    'TOCANTINS-ARAGUAIA':'TOC',
    'URUGUAI':'URU'
}

In [None]:
# Read and process
basin_csvs = glob.glob('./out/ls*cloudfilt*basin*.csv')
basin_csvs.sort()
basin_list = [read_process_region_csv(csv) for csv in basin_csvs]
basin_df = pd.concat(basin_list).set_index('year')
# Some filtering
basin_df = basin_df.loc[~((basin_df.index>2019)&(basin_df.satellite=='ls7'))]
basin_df = basin_df.groupby(['year', 'nm_macroRH']).mean().reset_index().set_index('year')
# basin_df = basin_df.loc[~((basin_df.index>=2001)&(basin_df.satellite=='ls5'))]
# basin_df = basin_df.loc[~((basin_df.index<2001)&(basin_df.satellite=='ls7'))]
# basin_df = basin_df.sort_index()
basin_df['hydro_region'] = basin_df['nm_macroRH'].map(basin_shortname_dict)
basin_df_columns = basin_df.reset_index().set_index(['year','hydro_region']).unstack(level=1).drop(
    columns=['nm_macroRH'])

In [11]:
# Sort
basin_columns_sorted = basin_df_columns.loc[1984, 'count'].sort_values().index
basin_df_columns = basin_df_columns.reindex(basin_columns_sorted, axis=1, level=1)

In [None]:
fig, axs = plt.subplots(1,2, figsize=(16,6))
basin_df_columns['count'].plot(ax=axs[0], kind='area', legend=False, colormap='tab20')
basin_df_columns['sum'].plot(ax=axs[1], kind='area', legend=False, colormap='tab20')
handles, labels = axs[0].get_legend_handles_labels()
axs[1].legend(handles[::-1], labels[::-1], loc='center left',bbox_to_anchor=(1, 0.5),
              ncol=1, title='Hydrographic Region')

axs[0].set_ylabel('Count')
axs[1].set_ylabel('Area (ha)')
axs[0].set_title('Reservoir Count')
axs[1].set_title('Reservoir Area')
fig.tight_layout()

## Biome

In [None]:
# Read and process
biome_csvs = glob.glob('./out/ls*cloudfilt*biome*.csv')
biome_csvs.sort()
biome_list = [read_process_region_csv(csv) for csv in biome_csvs]
biome_df = pd.concat(biome_list).set_index('year')
# Some filtering
# biome_df = biome_df.loc[~((biome_df.index==2002)&(biome_df.satellite=='ls5'))]
# biome_df = biome_df.loc[~((biome_df.index<2001)&(biome_df.satellite=='ls7'))]
biome_df = biome_df.loc[~((biome_df.index>2019)&(biome_df.satellite=='ls7'))]
biome_df = biome_df.groupby(['year', 'Bioma']).mean().reset_index().set_index('year')
# biome_df = biome_df.loc[:2019]
biome_df = biome_df.sort_index()
biome_df['biome'] = biome_df['Bioma']# .map(biome_shortname_dict)
biome_df_columns = biome_df.reset_index().set_index(['year','biome']).unstack(level=1).drop(
    columns=['Bioma'])
biome_columns_sorted = ['Pampa','Pantanal', 'Amazônia', 'Cerrado',  'Mata Atlântica', 'Caatinga']
biome_df_columns.loc[1984, 'count'].sort_values().index
biome_df_columns = biome_df_columns.reindex(biome_columns_sorted, axis=1, level=1)
biome_df_columns['count'] = biome_df_columns['count']/1000
biome_df_columns['sum'] = biome_df_columns['sum']/100

In [None]:

fig, axs = plt.subplots(1,2, figsize=(16,6))
color_list = ["#bdcc99","#ab9c97","#b3c4b9","#e0bc86","#898b9e","#adc0cc"]
biome_cmap = mpl.colors.ListedColormap(color_list)
biome_df_columns['count'].plot(ax=axs[0], kind='line', legend=False, colormap=biome_cmap, lw=1.75)
biome_df_columns['sum'].plot(ax=axs[1], kind='line', legend=False,colormap=biome_cmap, lw=1.75)
handles, labels = axs[0].get_legend_handles_labels()
# axs[1].legend(handles[::-1], labels[::-1], loc='center left',bbox_to_anchor=(1, 0.5),
#               ncol=1, title='Biome')

# Add labels next to biome
count_sum = 0
area_sum = 0
for i, biome in enumerate(biome_columns_sorted):
    cur_count = biome_df_columns[('count', biome)].loc[2023]
    axs[0].text(2023.5, cur_count, biome.replace(' ', '\n'), color=color_list[i], fontweight='bold', size=12, va='center')
    count_sum += cur_count
    cur_area = biome_df_columns[('sum', biome)].loc[2023]
    axs[1].text(2023.5, cur_area, biome.replace(' ', '\n'), color=color_list[i], fontweight='bold', size=12, va='center')
    area_sum += cur_area

for ax in axs:
    ax.set_xlim(1984,2023)
    ax.set_xlabel('Year', size=12)

axs[0].set_ylabel('Count (thousands)', size=12)
axs[1].set_ylabel('Area (km$^2$)', size=12)
for i, label in enumerate(['$(a)$', '$(b)$']):
    axs[i].annotate(
            label,
            xy=(0, 1), xycoords='axes fraction',
            xytext=(0.3, -1.5), textcoords='offset fontsize',
            fontsize=12, verticalalignment='bottom', fontfamily='serif')
# axs[0].set_title('Reservoir Count')
# axs[1].set_title('Reservoir Area')
fig.tight_layout()

In [None]:
fig, axs = plt.subplots(1,2, figsize=(16,6))
color_list = ["#bdcc99","#ab9c97","#b3c4b9","#e0bc86","#898b9e","#adc0cc"]
biome_cmap = mpl.colors.ListedColormap(color_list)
biome_df_columns['count'].plot(ax=axs[0], kind='area', legend=False, colormap=biome_cmap)
biome_df_columns['sum'].plot(ax=axs[1], kind='area', legend=False,colormap=biome_cmap)
handles, labels = axs[0].get_legend_handles_labels()

# Add labels next to biome
count_sum = 0
area_sum = 0
for i, biome in enumerate(biome_columns_sorted):
    cur_count = biome_df_columns[('count', biome)].loc[2023]
    axs[0].text(2023.5, count_sum + cur_count/2, biome.replace(' ', '\n'), color=color_list[i], fontweight='bold', size=12, va='center')
    count_sum += cur_count
    cur_area = biome_df_columns[('sum', biome)].loc[2023]
    axs[1].text(2023.5, area_sum + cur_area/2, biome.replace(' ', '\n'), color=color_list[i], fontweight='bold', size=12, va='center')
    area_sum += cur_area

for ax in axs:
    ax.set_xlim(1984,2023)
    ax.set_xlabel('Year', size=12)

axs[0].set_ylabel('Count (thousands)', size=12)
axs[1].set_ylabel('Area (km$^2$)', size=12)
for i, label in enumerate(['$(a)$', '$(b)$']):
    axs[i].annotate(
            label,
            xy=(0, 1), xycoords='axes fraction',
            xytext=(0.3, -1.5), textcoords='offset fontsize',
            fontsize=12, verticalalignment='bottom', fontfamily='serif')
# axs[0].set_title('Reservoir Count')
# axs[1].set_title('Reservoir Area')
fig.tight_layout()

## State

In [None]:
state_csvs = glob.glob('./out/ls*cloudfilt*state*.csv')
state_csvs.sort()
state_list = [read_process_region_csv(csv) for csv in state_csvs]
state_df = pd.concat(state_list).set_index('year')
# state_df = state_df.loc[~((state_df.index>2000)&(state_df.satellite=='ls5'))]
# state_df = state_df.loc[~((state_df.index==2000)&(state_df.satellite=='ls7'))]
state_df = state_df.loc[~((state_df.index>2019)&(state_df.satellite=='ls7'))]
state_df = state_df.groupby(['year', 'UF_05']).mean().reset_index().set_index('year')
state_df = state_df.loc[:2019]
state_df = state_df.sort_index()
state_df_columns = state_df.reset_index().set_index(['year','UF_05']).unstack(level=1)

In [None]:
fig, axs = plt.subplots(1,2, figsize=(18,7))
state_df_columns['count'].plot(ax=axs[0], kind='area', legend=False)
state_df_columns['sum'].plot(ax=axs[1], kind='area', legend=False)
axs[0].set_ylabel('Count')
axs[1].set_ylabel('Area (ha)')
handles, labels = axs[0].get_legend_handles_labels()
axs[1].legend(handles[::-1], labels[::-1], loc='center left',bbox_to_anchor=(1, 0.5),
              ncol=1, title='State')
axs[0].set_title('Reservoir Count')
axs[1].set_title('Reservoir Area')
fig.tight_layout()

### Region

In [18]:
reg_dict = state_gdf[['UF_05', 'REGIAO']].set_index('UF_05').to_dict()['REGIAO']
reg_shortname_dict = {
    'Sul': 'S',
    'Sudeste': 'SE',
    'Nordeste': 'NE',
    'Centro-Oeste': 'CO',
    'Norte': 'N'
}

In [19]:
state_df['region'] = state_df['UF_05'].map(reg_dict).map(reg_shortname_dict)

In [20]:
reg_df = state_df.drop(columns=['UF_05']).groupby(['year','region']).sum()
reg_df_columns = reg_df.unstack(level=1)
# Sort
reg_columns_sorted = reg_df_columns.loc[1984, 'count'].sort_values().index
reg_df_columns = reg_df_columns.reindex(reg_columns_sorted, axis=1, level=1)

In [None]:
fig, axs = plt.subplots(1,2, figsize=(18,7))
reg_df_columns['count'].plot(ax=axs[0], kind='area', legend=False)
reg_df_columns['sum'].plot(ax=axs[1], kind='area', legend=False)
axs[0].set_ylabel('Count')
axs[1].set_ylabel('Area (ha)')
handles, labels = axs[0].get_legend_handles_labels()
axs[1].legend(handles[::-1], labels[::-1], loc='center left',bbox_to_anchor=(1, 0.5),
              ncol=1, title='Region')
axs[0].set_title('Reservoir Count')
axs[1].set_title('Reservoir Area')
fig.tight_layout()

# Region and Basin

In [None]:
# Region
fig, axs = plt.subplots(2,2, figsize=(18,12))
(reg_df_columns['count']/1000).plot(ax=axs[0,0], kind='area', legend=False)
(reg_df_columns['sum']/100).plot(ax=axs[0,1], kind='area', legend=False)
handles, labels = axs[0,0].get_legend_handles_labels()
axs[0,1].legend(handles[::-1], labels[::-1], loc='center left',bbox_to_anchor=(1, 0.5),
              ncol=1, title='Macro Region')
axs[0,0].set_title('Reservoir Count')
axs[0,1].set_title('Reservoir Area')
(basin_df_columns['count']/1000).plot(ax=axs[1,0], kind='area', legend=False, colormap='tab20')
(basin_df_columns['sum']/100).plot(ax=axs[1,1], kind='area', legend=False, colormap='tab20')
handles, labels = axs[1,0].get_legend_handles_labels()
axs[1,1].legend(handles[::-1], labels[::-1], loc='center left',bbox_to_anchor=(1, 0.5),
              ncol=1, title='Hydrographic Region')

for ax in axs[:,0]:
    ax.set_ylabel('Count (thousands)')
for ax in axs[:,1]:
    ax.set_ylabel('Area ($km^2$)')
for ax in axs.flatten():
    ax.set_xlabel('Year')
    ax.set_xlim(1984, 2019)
# axs[1,0].set_title('Reservoir Count')
# axs[1,1].set_title('Reservoir Area')
fig.tight_layout()
# Hydro
fig.tight_layout()