In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
prop_gpkg = './data/pa_br_landtenure_studyarea_only.gpkg'
area_col = 'area_ha'

In [None]:
prop_gdf = gpd.read_file(prop_gpkg)
prop_gdf = prop_gdf.rename(columns={area_col:'prop_area'})
prop_gdf.loc[:,'fid'] = prop_gdf.index + 1
prop_gdf = prop_gdf.to_crs('EPSG:4326')

In [None]:
interview_df = pd.read_csv('/home/ksolvik/research/reservoirs/interviews/summer_2023/locs/fazendas_cleaned.csv')
interview_df['int_index'] = interview_df.index + 1
interview_gdf = gpd.GeoDataFrame(
    interview_df, geometry=gpd.points_from_xy(interview_df.lon, interview_df.lat),
    crs='EPSG:4326'
)

In [None]:
# Spatial join
int_prop_gdf = gpd.sjoin(interview_gdf, prop_gdf, predicate='within', how='left')

# Reservoir info

In [None]:
prop_gdf_res = int_prop_gdf.loc[~int_prop_gdf['fid'].isna()]
prop_gdf_res = prop_gdf_res.drop_duplicates(['fid'])
prop_gdf_res = prop_gdf_res.set_index('fid')
prop_gdf_res.columns = pd.MultiIndex.from_product([[2024], prop_gdf_res.columns])

In [None]:
# Get reservoir info
for y in np.arange(1984, 2023): 
    res_df = pd.read_csv('./out/res_stats/prop_res_stats_{}.csv'.format(y)).set_index('fid')
    res_df.columns = pd.MultiIndex.from_arrays([[y]*3, res_df.columns])
    prop_gdf_res = prop_gdf_res.join(res_df,how='left')

In [None]:
# Set index to interview id
prop_gdf_res[(2024, 'fid')] = prop_gdf_res.index.astype(int)
prop_gdf_res.index = prop_gdf_res[(2024, 'int_index')]

In [None]:
def plot_prop_res_history(id):
    print('Property Info:')
    temp_df = prop_gdf_res.loc[id]
    print(temp_df[2024])
    ax =temp_df.drop(2024)[:, 'sum'].plot()
    temp_df.drop(2024)[:, 'count'].plot(ax=ax)

In [None]:
print(prop_gdf_res.drop(columns=2024).sum(axis=1))

In [None]:
plot_prop_res_history(45) 

# MapBiomas Data

In [None]:

mb_keys_dict = {
    'crop': np.array([18,19,39,20,40,62,41,36,46,47,35,48]),
    'forest': np.array([3]),
    'savanna': np.array([4]),
    'grassland':np.array([12]),
    'pasture': np.array([15])
}

In [None]:
def assign_lulc_classes(in_df):
    out_df = pd.DataFrame()
    for lulc_class in mb_keys_dict.keys():
        sum_of_class = in_df.loc[:, np.in1d(in_df.columns.astype(int), mb_keys_dict[lulc_class])].sum(axis=1)
        out_df[lulc_class] = sum_of_class
    out_df = out_df.div((out_df.sum(axis=1)), axis=0)*100
    out_df['natural'] = out_df[['forest','savanna','grassland']].sum(axis=1)
    out_df.columns = pd.MultiIndex.from_product([[y],out_df.columns])
    return out_df

In [None]:
prop_gdf_lulc = int_prop_gdf.loc[~int_prop_gdf['fid'].isna()]
prop_gdf_lulc = prop_gdf_lulc.drop_duplicates(['fid'])
prop_gdf_lulc = prop_gdf_lulc.set_index('fid')
prop_gdf_lulc.columns = pd.MultiIndex.from_product([[2024], prop_gdf_lulc.columns])

In [None]:
for y in np.arange(1985, 2023):
    year_csv_path = './out/mb_stats/prop_mb_stats{}.csv'.format(y)
    year_df = pd.read_csv(year_csv_path, index_col=0)
    year_df.index = year_df.index.astype(int)
    lulc_df = assign_lulc_classes(year_df)
    prop_gdf_lulc = prop_gdf_lulc.join(lulc_df)

# Set index to interview id
prop_gdf_lulc[(2024, 'fid')] = prop_gdf_lulc.index.astype(int)
prop_gdf_lulc.index = prop_gdf_lulc[(2024, 'int_index')]

In [None]:

def plot_prop_lulc_history(fid, combine_natural=True):
    print('Property Info:')
    temp_df = prop_gdf_lulc.loc[fid]
    print(temp_df[2024])
    if combine_natural:
        df_to_plot = temp_df.drop(2024).unstack(level=1).drop(columns=['forest','savanna','grassland'])
    else:
        df_to_plot = temp_df.drop(2024).unstack(level=1).drop(columns=['natural'])
    df_to_plot.plot.area()
    

In [None]:
plot_prop_lulc_history(45)

# Plot both

In [None]:

def plot_prop_all(fid, combine_natural=True, print_info=True, include_note=False):
    fig, axs = plt.subplots(2,1)
    lulc_temp_df = prop_gdf_lulc.loc[fid]
    if print_info:
        print('Property Info:')
        print(lulc_temp_df[2024])

    # LULC
    if combine_natural:
        lulc_df_to_plot = lulc_temp_df.drop(2024,level=0).unstack(level=1).drop(columns=['forest','savanna','grassland'])
    else:
        lulc_df_to_plot = lulc_temp_df.drop(2024,level=0).unstack(level=1).drop(columns=['natural'])
    lulc_df_to_plot.plot.area(ax=axs[0])
    # Reservoirs
    prop_df_to_plot = prop_gdf_res.loc[fid].drop(2024,level=0).fillna(0)
    prop_df_to_plot.unstack(level=1).plot(ax=axs[1])
    if include_note:
        axs[0].set_title(lulc_temp_df[(2024,'Note')])

In [None]:
prop_gdf_res

In [None]:
plot_prop_all(45)

In [None]:
# View the all
# for fid in prop_gdf_res.index:
#     plot_prop_all(fid, print_info=False, include_note=True)

In [None]:

# Construction store
plot_prop_all(10255.0)

In [None]:
# Neighboring farm
plot_prop_all(10118.0)

In [None]:
# ???
plot_prop_all(10142.0)

In [None]:
# Tree property
plot_prop_all(6069.0)

In [None]:
# Nelore Vera Cruz
plot_prop_all(14603.0)

In [None]:
# Sindacato Rural Officer
plot_prop_all(6524.0)

In [None]:
# Big reservoir farm we visited
plot_prop_all(7957.0)

In [None]:
# Fish farm
plot_prop_all(12011.0)