In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import glob
import matplotlib.pyplot as plt
import matplotlib as mpl

In [2]:
prop_gpkg = './data/pa_br_landtenure_studyarea_only.gpkg'
area_col = 'area_ha'
prop_gdf = gpd.read_file(prop_gpkg)
prop_gdf = prop_gdf.rename(columns={area_col:'prop_area'})
prop_gdf.loc[:,'fid'] = prop_gdf.index + 1
prop_gdf = prop_gdf.to_crs('EPSG:4326')

In [3]:

mb_keys_dict = {
    'crop': np.array([18,19,39,20,40,62,41,36,46,47,35,48]),
    'forest': np.array([3]),
    'savanna': np.array([4]),
    'grassland':np.array([12]),
    'pasture': np.array([15])
}

muni_names_dict = {

    5101803: 'Barra do Garças',
    5102702: 'Canarana',
    5106257: 'Nova Xavantina',
    5107065: 'Querência',
}
muni_order = ['Barra do Garças', 'Nova Xavantina', 'Canarana', 'Querência']

In [4]:
def assign_lulc_classes(in_df):
    out_df = pd.DataFrame()
    for lulc_class in mb_keys_dict.keys():
        sum_of_class = in_df.loc[:, np.in1d(in_df.columns.astype(int), mb_keys_dict[lulc_class])].sum(axis=1)
        out_df[lulc_class] = sum_of_class
    # out_df = out_df.div((out_df.sum(axis=1)), axis=0)*100
    out_df = out_df*90000/(1000*1000)
    out_df['natural'] = out_df[['forest','savanna','grassland']].sum(axis=1)
    out_df = out_df.drop(columns=['forest','savanna','grassland'])
    out_df.columns = pd.MultiIndex.from_product([[y],out_df.columns])
    return out_df

def assign_property_lulc(row):
    per_df = row/row.sum()
    if np.sum(per_df['crop'] + per_df['pasture']) > 0.1:
        if per_df['crop'] > per_df['pasture']:
            return 'crop'
        else:
            return 'pasture'
    else:
        return per_df.idxmax()





In [None]:
prop_gdf = prop_gdf.drop_duplicates(['fid'])
prop_gdf = prop_gdf.set_index('fid')
prop_gdf.columns = pd.MultiIndex.from_product([[0], prop_gdf.columns])
for y in np.arange(1985, 2024):
    year_csv_path = './out/mb_stats/prop_mb_stats{}.csv'.format(y)
    year_df = pd.read_csv(year_csv_path, index_col=0)
    year_df.index = year_df.index.astype(int)
    lulc_df = assign_lulc_classes(year_df)
    lulc_df
    prop_gdf = prop_gdf.join(lulc_df)
    prop_gdf[(y, 'other')] = prop_gdf[(0, 'prop_area')] - lulc_df.loc[:,pd.IndexSlice[y, ['crop','natural','pasture']]].sum(axis=1)
    prop_gdf.loc[prop_gdf[(y, 'other')]<0, (y, 'other')] = 0

prop_gdf = prop_gdf.copy()
prop_gdf[(0, 'new_size_class')] = 'Small'
prop_gdf.loc[prop_gdf[(0, 'prop_area')] > 100, (0, 'new_size_class')] = 'Medium'
prop_gdf.loc[prop_gdf[(0, 'prop_area')] > 1000, (0, 'new_size_class')] = 'Large'
prop_gdf.loc[prop_gdf[(0, 'prop_area')] > 10000, (0, 'new_size_class')] = 'Super'

prop_gdf[(0, 'lulc_class')] = lulc_df[2023].apply(assign_property_lulc, axis=1)
# Only our 4 munis
prop_gdf = prop_gdf.loc[prop_gdf[(0, 'cd_mun')].isin(muni_names_dict.keys())]
prop_gdf = prop_gdf.loc[prop_gdf[(0, 'nm_class')] == 'PL']

In [6]:

type_counts = prop_gdf[0][['cd_mun', 'lulc_class']].groupby(['cd_mun', 'lulc_class']).value_counts()
type_counts_unstacked = type_counts.loc[muni_names_dict.keys()].unstack(level=0).rename(columns=muni_names_dict)
type_counts_unstacked.index = [s.capitalize() for s in type_counts_unstacked.index]


In [7]:

pie_color_list = ["#7A9E43", "#2F4F2F",'#D4A76A']

In [None]:
print(type_counts_unstacked[muni_order])

In [None]:
fig, axs = plt.subplots(2,2, figsize=(6.35, 5.35))
for i, muni in enumerate(muni_order):
    type_counts_unstacked.plot.pie(
        y=muni, colors=pie_color_list, legend=False, autopct='%1.0f%%', ax=axs.flatten()[i])
fig.tight_layout()
plt.savefig('/home/ksolvik/research/reservoirs/figs/ch3/lulc_dist.jpg', dpi=150,
            pil_kwargs={'quality':80},
            bbox_inches='tight')

In [10]:
# Get reservoir info
prop_gdf_res = prop_gdf.copy()
for y in np.arange(1984, 2024): 
    df_list = []
    for ls_csv in glob.glob('./out/res_stats_buffer/prop_res_stats_*{}.csv'.format(y)):
        res_df = pd.read_csv(ls_csv).set_index('fid')
        df_list.append(res_df.fillna(0))
    # Calc mean if multiple satellites
    year_res_df = pd.concat(df_list, axis=1).groupby(axis=1, level=0).mean()
    year_res_df.columns = pd.MultiIndex.from_arrays([[y]*3, year_res_df.columns])
    prop_gdf_res = prop_gdf_res.join(year_res_df,how='left')

In [11]:
prop_gdf_res[(0,'lulc_class')] = prop_gdf_res[(0, 'lulc_class')].str.capitalize()

In [None]:
size_colors = [ '#748A9E','#e88f43','#BCCDA5', '#C75D4D']
# Single, just numbers
cmap = mpl.colormaps.get_cmap('Set2')
fig, axs = plt.subplots(1,2, figsize=(7.35, 7.35))
prop_gdf_res[[(0, 'lulc_class'),(2023, 'count')]].groupby([(0, 'lulc_class')]).sum()[(2023, 'count')].plot.pie(
    ax=axs[0], colors=pie_color_list, autopct='%1.0f%%')
prop_gdf_res[[(0, 'new_size_class'),(2023, 'count')]].groupby([(0, 'new_size_class')]).sum()[(2023, 'count')].plot.pie(
    ax=axs[1], colors=size_colors, autopct='%1.0f%%')
axs[0].set_title ('Reservoirs by LULC')
axs[1].set_title ('Reservoirs by property size')
axs[0].set_ylabel('')
axs[1].set_ylabel('')
fig.tight_layout()
plt.savefig('/home/ksolvik/research/reservoirs/figs/ch3/res_dist.jpg', dpi=150,
            pil_kwargs={'quality':80},
            bbox_inches='tight')

In [None]:
cmap = mpl.colormaps.get_cmap('Set2')
fig, axs = plt.subplots(2,2, figsize=(7.35, 7.35))
prop_gdf_res[[(0, 'lulc_class'),(2023, 'count')]].groupby([(0, 'lulc_class')]).sum()[(2023, 'count')].plot.pie(
    ax=axs[0, 0], colors=pie_color_list, autopct='%1.0f%%')
prop_gdf_res[[(0, 'lulc_class'),(2023, 'sum')]].groupby([(0, 'lulc_class')]).sum()[(2023, 'sum')].plot.pie(
    ax=axs[0, 1], colors=pie_color_list, autopct='%1.0f%%')
axs[0, 0].set_title ('Res Count by LULC')
axs[0, 1].set_title ('Res Area by LULC') 
axs[0, 0].set_ylabel('')
axs[0, 1].set_ylabel('')
prop_gdf_res[[(0, 'new_size_class'),(2023, 'count')]].groupby([(0, 'new_size_class')]).sum()[(2023, 'count')].plot.pie(
    ax=axs[1, 0], colors=cmap([0, 1, 2, 3]), autopct='%1.0f%%')
prop_gdf_res[[(0, 'new_size_class'),(2023, 'sum')]].groupby([(0, 'new_size_class')]).sum()[(2023, 'sum')].plot.pie(
    ax=axs[1, 1], colors=cmap([0, 1, 2, 3]), autopct='%1.0f%%')
axs[1, 0].set_title ('Count by size class')
axs[1, 1].set_title ('Area by size class')
axs[1, 0].set_ylabel('')
axs[1, 1].set_ylabel('')
fig.tight_layout()

In [None]:
print(prop_gdf_res[(0,'new_size_class')].value_counts())

In [15]:
# Rename portuguese
port_rename_dict = {
    'Crop': 'Colheitas',
    'Pasture': 'Pastagem',
    'Natural': 'Floresta'
    }

In [16]:
prop_gdf_res_port = prop_gdf_res.copy()
prop_gdf_res_port[(0, 'lulc_class')] = prop_gdf_res_port[(0, 'lulc_class')].map(port_rename_dict)

In [17]:
pie_colors_port = ['lightgreen','forestgreen','peru']
grayscale_colors = plt.cm.gray([0.8, 0.4, 0.6])

In [None]:
# Portuguese
fig, ax = plt.subplots(1,1, figsize=(3.5, 3.5))
prop_gdf_res_port[[(0, 'lulc_class'),(2023, 'count')]].groupby([(0, 'lulc_class')]).sum()[(2023, 'count')].plot.pie(
    ax=ax, colors=grayscale_colors, autopct='%1.0f%%')
ax.set_title ('Distribuição das Represas')
ax.set_ylabel('')