In [None]:
import pandas as pd
import glob
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
import shapely

In [None]:
all_csvs = glob.glob('./out/sentinel_2021_v6_wgs84_merged.csv')
all_csvs.sort()

In [None]:
def read_process_csv(csv):
    temp_df = pd.read_csv(csv)
    temp_df['satellite'] = os.path.basename(csv)[:8]
    temp_df['year'] = int(os.path.basename(csv)[9:13])
    return temp_df

In [None]:

full_df = pd.concat([
    read_process_csv(csv) for csv in all_csvs
])

full_df = full_df.loc[full_df['hydropoly_max']<100]
full_df['area_ha'] = full_df['area']*100/10000 # HA
# full_df['area_km'] = full_df['area']*100/(1000*1000) # km2
full_df = full_df.loc[full_df['area_ha']<100] # Remove less than 100 ha

In [None]:
lulc_df = pd.read_csv('../lulc/out/lulc_stats_res_sentinel_2021_summarized.csv')

In [None]:
full_df[['center_lat','center_lon','latitude','longitude', 'area_ha']].join(lulc_df['class']).rename(columns={'class':'lulc_class','center_lat':'y_aea','center_lon':'x_aea'}).to_csv(
    './data/sentinel_2021_v6_cleaned.csv', index=False
)

In [None]:
pd.read_csv('./data/sentinel_2021_v6_cleaned.csv')

In [None]:

groupby_year_sat = full_df.groupby(['year', 'satellite']).agg(
    {'area_ha': ['mean','median','sum','count']}
)

In [None]:
print(groupby_year_sat)

# Basic area distribution stats

In [None]:
print(full_df.loc[(full_df.year==2021) & (full_df.area_ha > 5), 'area_km'].count())
print(full_df.loc[(full_df.year==2021) & (full_df.area_ha > 5), 'area_km'].count()/full_df.loc[full_df.year==2021].shape[0])
print(full_df.loc[(full_df.year==2021) & (full_df.area_ha < 1), 'area_km'].count()/full_df.loc[full_df.year==2021].shape[0])

In [None]:
total_area = full_df.loc[full_df.year==2021, 'area_km'].sum()
print(full_df.loc[full_df.year==2021, 'area_km'].sum())
print(full_df.loc[(full_df.year==2021) & (full_df.area_ha > 5), 'area_km'].sum())
print(full_df.loc[(full_df.year==2021) & (full_df.area_ha > 5), 'area_km'].sum()/total_area)

print(full_df.loc[(full_df.year==2021) & (full_df.area_ha < 1), 'area_km'].sum())
print(full_df.loc[(full_df.year==2021) & (full_df.area_ha < 1), 'area_km'].sum()/total_area)

In [None]:
plt.hist(full_df.loc[full_df['year'] == 2021, 'area_ha'], bins=np.arange(0, 5.01, 0.5))

# Map

In [None]:
# Temporary: Convert to lat/lon
for csv in all_csvs:
    print(csv, ', Starting')
    out_path = csv.replace('aea', 'wgs84')
    if not os.path.isfile(out_path):
        temp_df = pd.read_csv(csv)
        temp_df = temp_df[['center_lat', 'center_lon', 'hydropoly_max', 'area']]
        gdf = gpd.GeoDataFrame(
            temp_df, geometry=gpd.points_from_xy(temp_df.center_lon, temp_df.center_lat),
            crs='ESRI:102033'
        )
        gdf_wgs84 = gdf.to_crs('EPSG:4326')
        gdf_wgs84['longitude'] = gdf_wgs84.geometry.x
        gdf_wgs84['latitude'] = gdf_wgs84.geometry.y
        gdf_wgs84.drop(columns=['geometry']).to_csv(out_path, index=False)
    print(out_path, ', Done')

In [None]:

all_csvs = glob.glob('./out/sentinel*v6*wgs84_merged.csv')
all_csvs.sort()

full_wgs84_df = pd.concat([
    read_process_csv(csv) for csv in all_csvs
])

full_wgs84_df = full_wgs84_df.loc[full_wgs84_df['hydropoly_max']<100]
full_wgs84_df['area_ha'] = full_wgs84_df['area']*100/10000 # HA
full_wgs84_df['area_km'] = full_wgs84_df['area']*100/(1000*1000) # km2
full_wgs84_df = full_wgs84_df.loc[full_wgs84_df['area_ha']<100] # Remove less than 100 ha

In [None]:
brazil_gdf = gpd.read_file('/home/ksolvik/research/reservoirs/analysis/data/misc/general_borders/Brazilian_States_aea.shp')
brazil_gdf = brazil_gdf.to_crs('EPSG:4326')
# brazil_gdf = brazil_gdf.dissolve(by='REGIAO')

In [None]:

xlims = brazil_gdf.bounds.min()['minx'], brazil_gdf.bounds.max()['maxx']
xlims = (xlims[0] - 1.5, xlims[1]-4)
ylims = brazil_gdf.bounds.min()['miny'], brazil_gdf.bounds.max()['maxy']
ylims = (ylims[0] - 1, ylims[1] + 1)
ylims_range = ylims[1] - ylims[0]
xlims_range = xlims[1] - xlims[0]

In [None]:

axes_height_ratios=[1, 0.05]
fig, axs = plt.subplots(2, 1, figsize = (9, 10),
                       gridspec_kw={"height_ratios":axes_height_ratios})

outline_gdf = gpd.GeoDataFrame(
    geometry=gpd.GeoSeries(shapely.geometry.Polygon(
        [[xlims[0], ylims[0]],
         [xlims[0], ylims[1]],
         [xlims[1], ylims[1]],
         [xlims[1], ylims[0]],
         [xlims[0], ylims[0]]])),
    crs='EPSG:4326')
nonbrazil_poly = outline_gdf.overlay(brazil_gdf, how='difference')
brazil_gdf.boundary.plot(ax=axs[0], color='white', alpha=0.3)
year_2021_df = full_wgs84_df.loc[full_wgs84_df['year']==2021]
im_h1 = axs[0].hexbin(year_2021_df['longitude'], year_2021_df['latitude'],
              gridsize=(80, int(80*ylims_range/xlims_range)),
             vmin=0, vmax=2500,
             extent=xlims + ylims)

# fig.colorbar(im)

axs[0].set_xlabel('Longitude (deg)')
axs[0].set_ylabel('Latitude (deg)')
axs[0].set_xlim(xlims)
axs[0].set_ylim(ylims)

# Remove outside of Brazil
nonbrazil_poly.plot(ax=axs[0],color='black')


# Set up colorbar
gs = axs[0].get_gridspec()
# axs[-1].remove()
axs[-1].set_title('Reservoir Count per Hexagon')
fig.colorbar(im_h1, cax=axs[-1], orientation='horizontal')
fig.tight_layout()
plt.savefig('./big_2021.png',dpi=300)

In [None]:

%matplotlib inline

axes_height_ratios=[1, 0.05]
fig, axs = plt.subplots(2, 3, figsize = (18, 10),
                       gridspec_kw={"height_ratios":axes_height_ratios})

# Plot 2017 first
outline_gdf = gpd.GeoDataFrame(
    geometry=gpd.GeoSeries(shapely.geometry.Polygon(
        [[xlims[0], ylims[0]],
         [xlims[0], ylims[1]],
         [xlims[1], ylims[1]],
         [xlims[1], ylims[0]],
         [xlims[0], ylims[0]]])),
    crs='EPSG:4326')
nonbrazil_poly = outline_gdf.overlay(brazil_gdf, how='difference')

brazil_gdf.boundary.plot(ax=axs[0, 0], color='white', alpha=0.3)
year_2017_df = full_wgs84_df.loc[full_wgs84_df['year']==2017]
im_h1 = axs[0,0].hexbin(year_2017_df['longitude'], year_2017_df['latitude'],
              gridsize=(60, int(60*ylims_range/xlims_range)),
             vmin=0, vmax=5000,
             extent=xlims + ylims)

# Plot 2021
brazil_gdf.boundary.plot(ax=axs[0, 1], color='white', alpha=0.3)
year_2021_df = full_wgs84_df.loc[full_wgs84_df['year']==2021]
im_h2 = axs[0, 1].hexbin(year_2021_df['longitude'], year_2021_df['latitude'],
              gridsize=(60, int(60*ylims_range/xlims_range)),
             vmin=0, vmax=5000,
             extent=xlims + ylims)


# Plot difference
brazil_gdf.boundary.plot(ax=axs[0, 2], color='white', alpha=0.3)
im_h3 = axs[0, 2].hexbin(year_2021_df['longitude'], year_2021_df['latitude'],
              gridsize=(60, int(60*ylims_range/xlims_range)),
             vmin=-2000, vmax=2000,
             extent=xlims + ylims,
             cmap='coolwarm')
im_h3.set_array(im_h2.get_array()-im_h1.get_array())

# fig.colorbar(im)

for cur_ax in axs[0]:
    cur_ax.set_xlabel('Longitude (deg)')
    cur_ax.set_ylabel('Latitude (deg)')
    cur_ax.set_xlim(xlims)
    cur_ax.set_ylim(ylims)
axs[0,0].set_title('2017', size=20)
axs[0,1].set_title('2021', size=20)
axs[0,2].set_title('Change', size=20)

# Remove outside of Brazil
nonbrazil_poly.plot(ax=axs[0,0],color='white')
nonbrazil_poly.plot(ax=axs[0,1],color='white')
nonbrazil_poly.plot(ax=axs[0,2],color='white')


# Set up colorbar
gs = axs[0, 0].get_gridspec()
for ax in axs[-1]:
    ax.remove()
axbig = fig.add_subplot(gs[-1, :2])
axbig.set_title('Reservoir Count')
fig.colorbar(im_h2, cax=axbig, orientation='horizontal')
axlr = fig.add_subplot(gs[-1, 2])
axlr.set_title('Change')
fig.colorbar(im_h3, cax=axlr, orientation='horizontal')
fig.tight_layout()