In [None]:
import pandas as pd
import glob
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
all_csvs = glob.glob('./out/ls*merged.csv')
all_csvs.sort()

In [None]:
def read_process_csv(csv):
    temp_df = pd.read_csv(csv)
    temp_df['satellite'] = os.path.basename(csv)[:3]
    temp_df['year'] = int(os.path.basename(csv)[4:8])
    return temp_df

In [None]:
full_df = pd.concat([
    read_process_csv(csv) for csv in all_csvs
])

In [None]:
(full_df.loc[full_df.year==2022]['area_ha'] < 0.09).mean()

In [None]:
(full_df.loc[full_df.year==2022]['area_ha'] < 0.045).mean()

In [None]:
full_df = full_df.loc[full_df['hydropoly_max']<100]
full_df['area_ha'] = full_df['area']*100/10000 # HA
full_df['area_km'] = full_df['area']*100/(1000*1000) # km2
full_df = full_df.loc[full_df['area_ha']<100] # Remove less than 100 ha



In [None]:
groupby_year_sat = full_df.groupby(['year', 'satellite']).agg([
    'mean','median','sum','count'
])[['area', 'area_ha','area_km']]

In [None]:
groupby_year_sat_year_sat

In [None]:
year_satellite = groupby_year_sat.reset_index().set_index('year')[['satellite']]

# All colors from colorbrewer, these are a few options: 
# color_dict = {'ls5': '#1b9e77',
#               'ls7': '#d95f02',
#               'ls8': '#7570b3'}

color_dict = {'ls5': '#66c2a5',
              'ls7': '#fc8d62',
              'ls8': '#8da0cb'}

# color_dict = {'ls5': '#a6cee3',
#               'ls7': '#1f78b4',
#               'ls8': '#b2df8a'}

In [None]:
def get_ls_color(y):
    return color_dict[year_satellite.loc[y].values[0]]

def color_by_ls(df, column, ax):
    year_values = np.stack([groupby_year_sat.index.get_level_values('year'),
                        groupby_year_sat[column]],
                       axis=1)
    for start, stop in zip(year_values[:-1],year_values[1:]):
        x, y = zip(start, stop)
        ls_shortname = year_satellite.loc[x[0]].values[0]
        ls_name = 'Landsat {}'.format(ls_shortname[-1])
        if ls_name not in ax.get_legend_handles_labels()[1]:
            ax.plot(x, y, color=get_ls_color(x[0]), lw=2.5, label=ls_name,
                   solid_capstyle='round')
        else:
            ax.plot(x, y, color=get_ls_color(x[0]), lw=2.5,
                   solid_capstyle='round')

In [None]:
groupby_year_sat['count_k'] = groupby_year_sat[('area','count')]/1000

In [None]:
fig, axs = plt.subplots(1,2, figsize=(12,5))
color_by_ls(groupby_year_sat, 'count_k', axs[0])
color_by_ls(groupby_year_sat, ('area_km','sum'), axs[1])
axs[0].set_ylabel('Total Reservoir Count (thousands)')
axs[0].set_xlabel('Year')
axs[1].set_ylabel('Total Reservoir Surface Area ($km^2$)')
axs[1].set_xlabel('Year')
axs[0].legend()
fig.tight_layout()

In [None]:
groupby_year_sat['median_ha'] = groupby_year_sat[('area_ha','median')]
groupby_year_sat['mean_ha'] = groupby_year_sat[('area_ha','mean')]

In [None]:
fig, axs = plt.subplots(1,2, figsize=(12,5))
color_by_ls(groupby_year_sat, 'median_ha', axs[0])
color_by_ls(groupby_year_sat, 'mean_ha', axs[1])
axs[0].set_ylabel('Median Reservoir Size ($ha$)')
axs[0].set_xlabel('Year')
axs[1].set_ylabel('Mean Reservoir Size ($ha$)')
axs[1].set_xlabel('Year')
axs[0].legend()
fig.tight_layout()