In [None]:
import pandas as pd
import geopandas as gpd
import glob
import os
import numpy as np
import matplotlib.pyplot as plt
import scipy

# Read in reservoir info

In [None]:
all_csvs = glob.glob('./out/*.csv')
all_csvs.sort()

In [None]:
def read_process_csv(csv):
    temp_df = pd.read_csv(csv)
    temp_df['satellite'] = os.path.basename(csv)[:8]
    temp_df['year'] = int(os.path.basename(csv)[9:13])
    return temp_df

In [None]:
full_df = pd.concat([
    read_process_csv(csv) for csv in all_csvs
])

In [None]:
# Remove Agua Boa
full_df = full_df.loc[full_df['reg']!=0]
# Remove 2024, not a full set of data yet
full_df = full_df.loc[full_df['year']<2024]
# Remove property 0, which is NA
full_df = full_df.loc[full_df['property_mode']!=0]
# Remove "property_all", which contains all properties that each water object touches
full_df.drop(columns=['property_all'], inplace=True)
# Get surface area in ha
full_df['area'] = full_df['area']*100/10000

# Read in property data

In [None]:
prop_gdf = gpd.read_file('../properties/data/pa_br_landtenure_studyarea_only_aea.shp')
prop_gdf.rename(columns={'area_ha': 'prop_area_ha'}, inplace=True)

In [None]:
size_list = [2000, 500, 100]
size_names = ['Large', 'Medium', 'Small']
prop_gdf['prop_size_class'] = 'NA'

prop_gdf.loc[prop_gdf['prop_area_ha']>size_list[0], 'prop_size_class'] = 'Mega'
for i in range(len(size_list)):
    prop_gdf.loc[prop_gdf['prop_area_ha']<size_list[i], 'prop_size_class'] = size_names[i]

In [None]:
full_df_prop_details = full_df.merge(prop_gdf.loc[:, ['fid','nm_class','prop_size_class', 'prop_area_ha']],
                                     left_on = 'property_mode', right_on = 'fid')
full_df_prop_details_2023 = full_df_prop_details.loc[full_df_prop_details['year']==2023].drop(columns=['year','satellite'])

In [None]:
muni_list = [5101803, 5106257, 5102702, 5107065]
muni_name_list = ['Barra do Garças', 'Nova Xavantina', 'Canarana', 'Querência']
muni_dict = {
    5101803: 'Barra do Garças',
    5106257: 'Nova Xavantina',
    5102702: 'Canarana',
    5107065: 'Querência'
}

In [None]:
filt_prop_gdf = prop_gdf.loc[prop_gdf['cd_mun'].isin(muni_list)]

In [None]:
filt_prop_gdf = filt_prop_gdf.loc[filt_prop_gdf['nm_class']=='PL']

# Basic property plots

In [None]:
groupby_muni = filt_prop_gdf.groupby('nm_mun').median()

In [None]:
fig, axs = plt.subplots(1,2, figsize=(13,5))
filt_prop_gdf.loc[(filt_prop_gdf['prop_area_ha']<2000)&(filt_prop_gdf['prop_area_ha']>0.5),
                  'prop_area_ha'].hist(bins=20, ax=axs[0])
groupby_muni.loc[muni_name_list,'prop_area_ha'].plot.bar(ax=axs[1])
axs[0].set_xlabel('Property Size (ha)')
axs[0].set_ylabel('Count')
axs[1].set_xlabel('Municipality')
axs[1].set_ylabel('Median Property Size (ha)')
# axs[1].tick_params(axis='x', labelrotation=45)
axs[1].set_xticks(axs[1].get_xticks(), axs[1].get_xticklabels(), rotation=45, ha='right')

In [None]:
filt_prop_gdf.loc[(filt_prop_gdf['prop_area_ha']<2000)&(filt_prop_gdf['prop_area_ha']>0.5),
                  'prop_area_ha'].hist(bins=20)

# Plot property size vs reservoirs

In [None]:
groupby_prop_id = full_df_prop_details_2023.loc[full_df_prop_details_2023['nm_class']=='PL',
                                                ['fid', 'area', 'prop_area_ha']].groupby('fid').agg({
    'area':['min','max','mean','median','sum','count'],
    'prop_area_ha':['first']})

In [None]:
groupby_prop_id = groupby_prop_id.loc[groupby_prop_id[('prop_area_ha','first')] < 5000]

In [None]:
groupby_prop_id[('area', 'sum')] = 0.01*groupby_prop_id[('area','sum')]/groupby_prop_id[('prop_area_ha', 'first')]

In [None]:
plt.hist(groupby_prop_id[('area','sum')])

In [None]:
groupby_prop_id.plot.scatter(x=('prop_area_ha','first'),y=('area','max'))

In [None]:
groupby_prop_id.plot.scatter(x=('prop_area_ha','first'),y=('area','sum'))

In [None]:
groupby_prop_id.plot.scatter(x=('prop_area_ha','first'),y=('area','count'))

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(16,6))
groupby_prop_id.plot.scatter(x=('prop_area_ha','first'),y=('area','sum'), ax=ax[0], alpha=0.5)
groupby_prop_id.plot.scatter(x=('prop_area_ha','first'),y=('area','count'), ax=ax[1], alpha=0.5)

In [None]:
scipy.stats.spearmanr(groupby_prop_id[('prop_area_ha','first')], groupby_prop_id[('area', 'sum')])

In [None]:
scipy.stats.spearmanr(groupby_prop_id[('prop_area_ha','first')], groupby_prop_id[('area', 'count')])

In [None]:
groupby_prop_id['area_density_sum'] = groupby_prop_id[('area', 'sum')]/groupby_prop_id[('prop_area_ha','first')]

In [None]:
groupby_prop_id['area_density_count'] = groupby_prop_id[('area', 'count')]/groupby_prop_id[('prop_area_ha','first')]

In [None]:
scipy.stats.spearmanr(groupby_prop_id[('prop_area_ha','first')], groupby_prop_id['area_density_sum'])

In [None]:
scipy.stats.spearmanr(groupby_prop_id[('prop_area_ha','first')], groupby_prop_id['area_density_count'])

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(16,6))
groupby_prop_id.plot.scatter(x=('prop_area_ha','first'),y='area_density_sum', ax=ax[0], alpha=0.5)
groupby_prop_id.plot.scatter(x=('prop_area_ha','first'),y='area_density_count', ax=ax[1], alpha=0.5)
ax[0].set_xlim(-50, 2000)
ax[1].set_xlim(-50, 2000)

# Groupby property type

In [None]:
groupby_prop_id = full_df_prop_details_2023.loc[full_df_prop_details_2023['nm_class']=='PL',
                                                ['fid', 'area', 'prop_area_ha']].groupby('fid').agg({
    'area':['mean','median','sum','count']})

In [None]:
prop_deets = full_df_prop_details_2023.loc[full_df_prop_details_2023['nm_class']=='PL',
                                           ['prop_area_ha', 'prop_size_class', 'fid']].groupby('fid').first()

In [None]:
prop_deets_resstats = groupby_prop_id['area'].join(prop_deets)

In [None]:
prop_deets_resstats['area_density_sum'] = prop_deets_resstats['sum']/prop_deets_resstats['prop_area_ha']
prop_deets_resstats['area_density_count'] = prop_deets_resstats['count']/prop_deets_resstats['prop_area_ha']

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(16,6))
prop_deets_resstats.loc[prop_deets_resstats['prop_size_class']!='H', ['sum', 'prop_size_class']
                       ].boxplot(by='prop_size_class', ax=ax[0])
prop_deets_resstats.loc[prop_deets_resstats['prop_size_class']!='H', ['count', 'prop_size_class']
                       ].boxplot(by='prop_size_class', ax=ax[1])


In [None]:
fig, ax = plt.subplots(1, 2, figsize=(16,6))
prop_deets_resstats.loc[:, ['area_density_sum', 'prop_size_class']
                       ].boxplot(by='prop_size_class', ax=ax[0])
prop_deets_resstats.loc[:, ['area_density_count', 'prop_size_class']
                       ].boxplot(by='prop_size_class', ax=ax[1])


In [None]:
groupby_prop_type = prop_deets_resstats.groupby(['prop_size_class']).agg([
    'mean','median','sum','count'
]).loc[['Small', 'Medium', 'Large','Mega']]

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(13,5))
groupby_prop_type[('count','mean')].plot.bar(ax=axs[0])
groupby_prop_type[('sum','mean')].plot.bar(ax=axs[1])
axs[0].set_xlabel('Property Size Class')
axs[1].set_xlabel('Property Size Class')
axs[1].set_ylabel('Mean Total Reservoir Area (ha)')
axs[0].set_ylabel('Mean Reservoir Count (ha)')
axs[0].tick_params(axis='x', labelrotation=0)
axs[1].tick_params(axis='x', labelrotation=0)

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(13,5))
groupby_prop_type[('area_density_count','mean')].plot.bar(ax=axs[0])
groupby_prop_type[('area_density_sum','mean')].plot.bar(ax=axs[1])
axs[0].set_xlabel('Property Size Class')
axs[1].set_xlabel('Property Size Class')
axs[0].set_ylabel('Mean Reservoir Density (count per ha)')
axs[1].set_ylabel('Mean Reservoir Area Density\n(reservoir ha per property ha)')
axs[0].tick_params(axis='x', labelrotation=0)
axs[1].tick_params(axis='x', labelrotation=0)

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(13,10))
groupby_prop_type[('count','mean')].plot.bar(ax=axs[0])
groupby_prop_type[('sum','mean')].plot.bar(ax=axs[1])
groupby_prop_type[('area_density_count','mean')].plot.bar(ax=axs[1, 0])
groupby_prop_type[('area_density_sum','mean')].plot.bar(ax=axs[1, 1])
axs[0].set_xlabel('Property Size Class')
axs[1].set_xlabel('Property Size Class')
axs[0].set_ylabel('Mean Reservoir Density (count per ha)')
axs[1].set_ylabel('Mean Reservoir Area Density\n(reservoir ha per property ha)')
axs[0].tick_params(axis='x', labelrotation=0)
axs[1].tick_params(axis='x', labelrotation=0)

In [None]:
groupby_prop_type['count', 'sum'].sum()

In [None]:
984/2431

In [None]:
(filt_prop_gdf['prop_area_ha'] > 2000).mean()