In [None]:
import pandas as pd
import glob
import os
import numpy as np
import matplotlib.pyplot as plt

In [None]:
np.concatenate([np.arange(1984, 2001), np.arange(2003, 2012)])

In [None]:
# Year/Landsat combinations for final product
landsat_year_dict = {
     # 1984 to 2011, excluding 2001 and 2002
    'landsat5': np.concatenate([np.arange(1984, 2001), np.arange(2003, 2012)]),
    # 2001 and 2002 (bad LS5 data) and plugging gap between Landsat 5 and 7
    'landsat7': np.array([2001, 2002, 2012, 2013]),
    # 2014 to 2022
    'landsat8': np.arange(2014, 2023)
}

In [None]:
region_dict = {
 0: 'Agua Boa',
 1: 'Barra do Garcas',
 2: 'Canarana',
 3: 'Nova Xavantina',
 4: 'Querencia'
}
 

In [None]:
all_csvs = glob.glob('./out/*.csv')
all_csvs.sort()

In [None]:
def read_process_csv(csv):
    temp_df = pd.read_csv(csv)
    temp_df['satellite'] = os.path.basename(csv)[:8]
    temp_df['year'] = int(os.path.basename(csv)[9:13])
    return temp_df

In [None]:
full_df = pd.concat([
    read_process_csv(csv) for csv in all_csvs
])

In [None]:
# Remove Agua Boa
full_df = full_df.loc[full_df['reg']!=0]
# Remove 2024, not a full set of data yet
full_df = full_df.loc[full_df['year']<2024]
# Remove last year of Landsat5
full_df = full_df.loc[~((full_df['satellite']=='landsat5')&(full_df['year']==2011))]

In [None]:
full_df['area'] = full_df['area']*100/10000

In [None]:
groupby_reg = full_df.groupby(['satellite', 'year', 'reg']).agg([
    'min','max','mean','median','sum','count'
])['area']

In [None]:
groupby_noreg = full_df.drop(columns=['reg']).groupby(['satellite', 'year']).agg([
    'max','mean','median','sum','count'
])['area']

In [None]:
ax = groupby_noreg.loc['landsat5'].plot(y = 'count', use_index=True, label="Landsat 5")
groupby_noreg.loc['landsat7'].plot(ax = ax, y = 'count', use_index=True, label="Landsat 7")
groupby_noreg.loc['landsat8'].plot(ax = ax,  y = 'count', use_index=True, label="Landsat 8")
ax.set_ylabel('Reservoir Count')
ax.set_xlabel('Year')

In [None]:
ax = groupby_noreg.loc['landsat5'].plot(y = 'sum', use_index=True, label="Landsat 5")
groupby_noreg.loc['landsat7'].plot(ax = ax, y = 'sum', use_index=True, label="Landsat 7")
groupby_noreg.loc['landsat8'].plot(ax = ax,  y = 'sum', use_index=True, label="Landsat 8")
ax.set_ylabel('Total Reservoir Area (ha)')
ax.set_xlabel('Year')
ax.set_title('Reservoir Area in Interview Study Area \n (Barra do Garcas, Canarana, Querencia, & Nova Xavantina)')

In [None]:
ax = groupby_noreg.loc['landsat5'].plot(y = 'median', use_index=True, label="Landsat 5")
groupby_noreg.loc['landsat7'].plot(ax = ax, y = 'median', use_index=True, label="Landsat 7")
groupby_noreg.loc['landsat8'].plot(ax = ax,  y = 'median', use_index=True, label="Landsat 8")

# Regional plots, using harmonized record of satellites

In [None]:
df_list = []
for sat_name, year_list in landsat_year_dict.items():
    df_list.append(groupby_reg.loc[pd.IndexSlice[sat_name, year_list, :]])

In [None]:
groupby_reg_continuous = pd.concat(df_list)

In [None]:
groupby_reg_continuous.index = groupby_reg_continuous.index.droplevel(0)
groupby_reg_continuous = groupby_reg_continuous.sort_index()

In [None]:
all_reg_sum_vals = [groupby_reg_continuous.loc[pd.IndexSlice[:, i], :][('sum')].values for i in range(1, 5)]
all_reg_count_vals = [groupby_reg_continuous.loc[pd.IndexSlice[:, i], :][('count')].values for i in range(1, 5)]
all_reg_median_vals = [groupby_reg_continuous.loc[pd.IndexSlice[:, i], :][('median')].values for i in range(1, 5)]

In [None]:
groupby_reg_median= pd.DataFrame({
    'year': groupby_reg_continuous.index.get_level_values(0).unique(),
})
for i in range(1, 5):
    groupby_reg_median[region_dict[i]] = all_reg_median_vals[i-1]

In [None]:
groupby_reg_median.plot(x='year')

In [None]:
groupby_reg_sum = pd.DataFrame({
    'year': groupby_reg_continuous.index.get_level_values(0).unique(),
})
for i in range(1, 5):
    groupby_reg_sum[region_dict[i]] = all_reg_sum_vals[i-1]

In [None]:
groupby_reg_sum.plot(x='year')

In [None]:
groupby_reg_sum.plot.area(x='year')

In [None]:
groupby_reg_count = pd.DataFrame({
    'year': groupby_reg_continuous.index.get_level_values(0).unique(),
})
for i in range(1, 5):
    groupby_reg_count[region_dict[i]] = all_reg_count_vals[i-1]

In [None]:
groupby_reg_count.plot(x='year')

In [None]:
groupby_reg_count.plot.area(x='year')

# Plot that puts it all together

In [None]:
groupby_reg_count[['year','Querencia', 'Canarana', 'Nova Xavantina', 'Barra do Garcas']]

In [None]:
region_order = 

In [None]:
# Together
fig, axs = plt.subplots(1,2, figsize=(10,4))
groupby_noreg.loc['landsat5'].plot(ax = axs[0], y = 'count', use_index=True,
                                   label="Landsat 5", color='darkcyan')
groupby_noreg.loc['landsat7'].plot(ax = axs[0], y = 'count', use_index=True,
                                   label="Landsat 7", color='darkmagenta', style='--')
groupby_noreg.loc['landsat8'].plot(ax = axs[0],  y = 'count', use_index=True,
                                   label="Landsat 8", color='darkgreen')

# Regional, reorder first
groupby_reg_count[['year', 'Querencia', 'Canarana', 'Nova Xavantina', 'Barra do Garcas']].plot(
    x='year', ax=axs[1])
axs[0].set_xlabel('Year')
axs[1].set_xlabel('Year')
axs[0].set_ylabel('Reservour Count')
axs[1].set_ylabel('Reservoir Count')
plt.show()