In [1]:
import glob
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import string

In [2]:
prop_gpkg = './data/pa_br_landtenure_studyarea_only.gpkg'
area_col = 'area_ha'

In [3]:
prop_gdf = gpd.read_file(prop_gpkg)
prop_gdf = prop_gdf.rename(columns={area_col:'prop_area'})
prop_gdf.loc[:,'fid'] = prop_gdf.index + 1
prop_gdf = prop_gdf.to_crs('EPSG:4326')

In [4]:
interview_df = pd.read_csv('/home/ksolvik/research/reservoirs/interviews/summer_2023/locs/fazendas_cleaned_ids.csv')
interview_df['int_index'] = interview_df.index + 1
# old_ids = interview_df['int_index'].values.copy()
# np.random.shuffle(old_ids)
# interview_df['anon_id'] = old_ids
# interview_df.to_csv('/home/ksolvik/research/reservoirs/interviews/summer_2023/locs/fazendas_cleaned_ids.csv', index=False)
interview_gdf = gpd.GeoDataFrame(
    interview_df, geometry=gpd.points_from_xy(interview_df.lon, interview_df.lat),
    crs='EPSG:4326'
)

In [5]:
# Spatial join
int_prop_gdf = gpd.sjoin(interview_gdf, prop_gdf, predicate='within', how='left')

# Reservoir info

In [6]:
prop_gdf_res = int_prop_gdf.loc[~int_prop_gdf['fid'].isna()]
prop_gdf_res = prop_gdf_res.drop_duplicates(['fid'])
prop_gdf_res = prop_gdf_res.set_index('fid')
prop_gdf_res.columns = pd.MultiIndex.from_product([[0], prop_gdf_res.columns])

In [7]:
# Get reservoir info
for y in np.arange(1984, 2024): 
    df_list = []
    for ls_csv in glob.glob('./out/res_stats_buffer/prop_res_stats_*{}.csv'.format(y)):
        res_df = pd.read_csv(ls_csv).set_index('fid')
        df_list.append(res_df.fillna(0))
    # Calc mean if multiple satellites
    year_res_df = pd.concat(df_list, axis=1).groupby(axis=1, level=0).mean()
    year_res_df.columns = pd.MultiIndex.from_arrays([[y]*3, year_res_df.columns])
    prop_gdf_res = prop_gdf_res.join(year_res_df,how='left')

In [8]:
# Set index to interview id
prop_gdf_res[(0, 'fid')] = prop_gdf_res.index.astype(int)
prop_gdf_res.index = prop_gdf_res[(0, 'int_index')]

In [9]:
def plot_prop_res_history(id):
    print('Property Info:')
    temp_df = prop_gdf_res.loc[id]
    print(temp_df[0])
    ax =temp_df.drop(0)[:, 'sum'].plot()
    temp_df.drop(0)[:, 'count'].plot(ax=ax)

# MapBiomas Data

In [10]:

mb_keys_dict = {
    'crop': np.array([18,19,39,20,40,62,41,36,46,47,35,48]),
    'forest': np.array([3]),
    'savanna': np.array([4]),
    'grassland':np.array([12]),
    'pasture': np.array([15])
}

In [11]:
def assign_lulc_classes(in_df):
    out_df = pd.DataFrame()
    for lulc_class in mb_keys_dict.keys():
        sum_of_class = in_df.loc[:, np.in1d(in_df.columns.astype(int), mb_keys_dict[lulc_class])].sum(axis=1)
        out_df[lulc_class] = sum_of_class
    # out_df = out_df.div((out_df.sum(axis=1)), axis=0)*100
    out_df = out_df*90000/(1000*1000)
    out_df['natural'] = out_df[['forest','savanna','grassland']].sum(axis=1)
    out_df.columns = pd.MultiIndex.from_product([[y],out_df.columns])
    return out_df

In [12]:
prop_gdf_lulc = int_prop_gdf.loc[~int_prop_gdf['fid'].isna()]
prop_gdf_lulc = prop_gdf_lulc.drop_duplicates(['fid'])
prop_gdf_lulc = prop_gdf_lulc.set_index('fid')
prop_gdf_lulc.columns = pd.MultiIndex.from_product([[0], prop_gdf_lulc.columns])

In [None]:
for y in np.arange(1985, 2024):
    year_csv_path = './out/mb_stats/prop_mb_stats{}.csv'.format(y)
    year_df = pd.read_csv(year_csv_path, index_col=0)
    year_df.index = year_df.index.astype(int)
    lulc_df = assign_lulc_classes(year_df)
    prop_gdf_lulc = prop_gdf_lulc.join(lulc_df)
    prop_gdf_lulc[(y, 'other')] = prop_gdf_lulc[(0, 'prop_area')] - lulc_df.loc[:,pd.IndexSlice[y, ['crop','natural','pasture']]].sum(axis=1)
    prop_gdf_lulc.loc[prop_gdf_lulc[(y, 'other')]<0, (y, 'other')] = 0

# Set index to interview id
prop_gdf_lulc[(0, 'fid')] = prop_gdf_lulc.index.astype(int)
prop_gdf_lulc.index = prop_gdf_lulc[(0, 'int_index')]

In [14]:

def plot_prop_lulc_history(fid, combine_natural=True):
    print('Property Info:')
    temp_df = prop_gdf_lulc.loc[fid]
    print(temp_df[0])
    if combine_natural:
        df_to_plot = temp_df.drop(0).unstack(level=1).drop(columns=['forest','savanna','grassland'])
    else:
        df_to_plot = temp_df.drop(0).unstack(level=1).drop(columns=['natural'])
    df_to_plot.plot.area()
    

# Plot LULC and reservoir history

In [15]:
ag_class_color_list = ['pink','sienna','darkgreen', 'slategrey']

In [16]:
def plot_prop_twoax(fid, combine_natural=True, print_info=True, include_note=False):
    fig, ax = plt.subplots(1,1, figsize=[7.35, 4])
    lulc_temp_df = prop_gdf_lulc.loc[fid]
    if print_info:
        print('Property Info:')
        print(lulc_temp_df[0])
    # LULC
    if combine_natural:
        lulc_df_to_plot = lulc_temp_df.drop(0,level=0).unstack(level=1).drop(columns=['forest','savanna','grassland'])
    else:
        lulc_df_to_plot = lulc_temp_df.drop(0,level=0).unstack(level=1).drop(columns=['natural'])
    lulc_df_to_plot.columns=[c.capitalize() for c in lulc_df_to_plot.columns]
    lulc_df_to_plot.plot.area(ax=ax, color=ag_class_color_list, legend=False)
    # Reservoirs
    ax2 = ax.twinx()  # instantiate a second Axes that shares the same x-axis

    prop_df_to_plot = prop_gdf_res.loc[fid].drop(0,level=0).drop(['median','count'], level=1).fillna(0)
    prop_df_to_plot.unstack(level=1).rename(columns={'sum':'Reservoir Area'}).plot(ax=ax2, color='black', lw=1.5, style='--', legend=False)
    ax.set_xlabel('Year')
    ax.set_xlim([1985, 2023])
    ax.set_ylabel('Land Area (ha)')
    ax2.set_ylabel('Total Res Surface Area (ha)')
    if include_note:
        ax.set_title(lulc_temp_df[(0, 'Note')] + '\n'
                     + lulc_temp_df[(0,'Transcript')] + ' ' + str(fid))

def plot_prop_twoax_7(fids, combine_natural=True, include_note=False, include_title=True):
    fig, axs = plt.subplots(4,2, figsize=(7.35,7.35))
    alphabet_list = string.ascii_uppercase
    for i, fid in enumerate(fids): 
        ax = axs.flatten()[i]
        lulc_temp_df = prop_gdf_lulc.loc[fid]
        # LULC
        if combine_natural:
            lulc_df_to_plot = lulc_temp_df.drop(0,level=0).unstack(level=1).drop(columns=['forest','savanna','grassland'])
        else:
            lulc_df_to_plot = lulc_temp_df.drop(0,level=0).unstack(level=1).drop(columns=['natural'])
        lulc_df_to_plot.columns=[c.capitalize() for c in lulc_df_to_plot.columns]
        lulc_df_to_plot.plot(kind='area',
            ax=ax, color=ag_class_color_list, legend=False, aa=True,lw=0.01)
        # Reservoirs
        ax2 = ax.twinx()  # instantiate a second Axes that shares the same x-axis

        prop_df_to_plot = prop_gdf_res.loc[fid].drop(0,level=0).drop(['median','count'], level=1).fillna(0)
        prop_df_to_plot.unstack(level=1).rename(columns={'sum':'Reservoir Area'}).plot(
            ax=ax2, color='black', lw=1.5, style='--', legend=False)
        ax.set_xlabel('Year')
        ax.set_xlim([1985, 2023])
        ax.set_ylim([0, np.min(lulc_df_to_plot.sum(axis=1).values)])
        ax.set_ylabel('Land Area (ha)')
        ax2.set_ylabel('Res Aea (ha)')
        max_res_area = np.max(prop_df_to_plot.unstack(level=1).values)
        if max_res_area < 5:
            ax2.set_ylim(-0.1, 5)
        else:
            ax2.set_ylim(-0.1, max_res_area+5)
            ax.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}'))
        if include_note:
            ax.set_title(lulc_temp_df[(0,'Note')] + str(fid))
        elif include_title: 
            ax.set_title('({}) {}'.format(alphabet_list[i], lulc_temp_df[(0, 'Title')]))
        else:
            ax.set_title('#{}'.format(lulc_temp_df[(0,'anon_id')]))

    # Add legend
    # Last axis
    ax=axs[-1,-1]
    ax.set_axis_off()
    handles, labels = axs[-1, -2].get_legend_handles_labels()
    # Line marker and label
    ax2_handle, ax2_label = ax2.get_legend_handles_labels()
    handles.append(ax2_handle[0])
    labels.append(ax2_label[0])

    ax.legend(handles, labels,loc=10,
                fontsize=12)
    fig.tight_layout()

In [None]:
# To plot:
props_to_plots=[5, 35, 2, 28, 1, 9, 45]
plot_prop_twoax_7(props_to_plots, include_title=True, include_note=False)

In [None]:
# Plot them all
for id in prop_gdf_res.index:
    plot_prop_twoax(id, print_info=False, include_note=True)