In [1]:
import os
import re
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm
from sklearn.preprocessing import StandardScaler
from matplotlib.ticker import FormatStrFormatter
from scipy.stats import ttest_1samp
import statsmodels.stats.api as sms
import pickle
%config Completer.use_jedi = False
%matplotlib widget

In [2]:
"""
Define I/O paths
"""
# input directores
inDIR_data = 'C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/data/'
inDIR_mods = 'C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/models/'

# output directories
outDIR_tables = 'C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/models/results_tables/'

# file name of gridded path intensity data (output from gps_to_gridded_path_intensity.ipynb)
griddata_f = '2017_grazing_time_gridded_all_for_model.csv'

# file name of model results
results_f = '2017_grazing_intensity_model_results.csv'

# file name of model coefficients
coefs_f = '2017_grazing_intensity_model_coefficients.csv'

In [3]:
"""
Define models for analyzing
"""
# create list of saved models
mod_list = ['M0', 'M1', 'M2_bm', 'M2_top', 'M3_pc', 'M3_top', 'M4_top', 'M_best']

# create dictionary to map top models to original models
top_mod_dict = {
    'M2_top': 'M2g',
    'M3_top': 'M3d',
    'M4_top': 'M4h',
    'M5_top': 'M5'
}

# create dictionaries of saved model results for AIC comparison
mod_aic_dict = {
    'TPC2': [top_mod_dict['M2_top'], 'M2_tpc'],
    'TPC3': [top_mod_dict['M3_top'], 'M3_tpc'],
    'TPC4': [top_mod_dict['M4_top'], top_mod_dict['M5_top']],
    'Hw1': ['M1', 'M5'],
    'Hw2': ['M2a', 'M2b', 'M2c', 'M2d', 'M2e', 'M2f', 'M2g'],
    'Hw3': ['M2a', 'M2b', 'M2c', 'M2d', 'M2e', 'M2f', 'M2g'],
    'Hw4a': ['M2a', 'M2b'],
    'Hw4b': ['M2f', 'M2g'],
    'Hw5': ['M2a', 'M2b', 'M2c', 'M2d', 'M2e', 'M2f', 'M2g'],
    'Hw6': ['M2a', 'M2b', 'M2c', 'M2d', 'M2e', 'M2f', 'M2g'],
    'Hw7': ['M3a', 'M3b', 'M3c', 'M3d'],
    'Hw8': ['M3a', 'M3b', 'M3c', 'M3d'],
    'Hw9': ['M3a', 'M3b', 'M3c', 'M3d'],
    'Ha1': ['M0', 'M1', 'M2', 'M3'],
    'Ha2': ['M0', 'M1', 'M2', 'M3', 'M4'],
    'Ha3': ['M0', 'M1', 'M2', 'M3', 'M4'],
    'Ha4': ['M0', 'M1', 'M2', 'M3', 'M4'],
}

# create dictionaries of saved scalers
scaler_dict = {}
for seas in ['early', 'mid', 'late']:
    scaler_dict[seas] = {}
    for c in ['dFence', 'dTank', 'Biomass', 'CP', 'PC_div']:
        with open(os.path.join(inDIR_mods, 'scaler_' + seas + '_' + c + '.pkl'), 'rb') as f:
            scaler_dict[seas][c] = pickle.load(f)

In [4]:
"""
Load data and models
"""
# read gridded data into dataframe
df_wkly_grid_full = pd.read_csv(os.path.join(inDIR_data, griddata_f), engine='python')

# read model results into dataframe
df_results = pd.read_csv(os.path.join(inDIR_mods, results_f), engine='python')

# read model results into dataframe
df_coefs = pd.read_csv(os.path.join(inDIR_mods, coefs_f), engine='python')

# create dictionary of the seasonal models
seas_mod_dict = {}
for seas in df_wkly_grid_full[df_wkly_grid_full['mod_data'] == 'train']['season'].unique():
    seas_mod_dict[seas] = {}
    for mod in mod_list:
        f_matched = [f for f in os.listdir(os.path.join(inDIR_mods, 'seas')) if mod in f and seas in f]
        if len(f_matched) == 1:
            seas_mod_dict[seas][mod] = sm.load(os.path.join(
                inDIR_mods, 'seas', f_matched[0]))
        else:
            print('ERROR: None and/or multiple matching files found:')
            display(f_matched)
            
# create dictionary of the pasture-specific models
past_mod_dict = {}
for past in df_wkly_grid_full[df_wkly_grid_full['mod_data'] == 'train']['Pasture'].unique():
    past_mod_dict[past] = {}
    for seas in df_wkly_grid_full[df_wkly_grid_full['mod_data'] == 'train']['season'].unique():
        past_mod_dict[past][seas] = {}
        for mod in mod_list:
            f_matched = [f for f in os.listdir(os.path.join(inDIR_mods, 'past')) if past in f and mod in f and seas in f]
            if len(f_matched) == 1:
                past_mod_dict[past][seas][mod] = sm.load(os.path.join(
                    inDIR_mods, 'past', f_matched[0]))
            else:
                print('ERROR: None and/or multiple matching files found:')
                display(f_matched)
        
# scale data back to original values
for c in ['dFence', 'dTank', 'Biomass', 'CP', 'PC_div']:
    for seas in ['early', 'mid', 'late']:
        scaler = scaler_dict[seas][c]
        df_wkly_grid_full.loc[df_wkly_grid_full['season'] == seas, c + '_orig'] = scaler.inverse_transform(
            df_wkly_grid_full.loc[df_wkly_grid_full['season'] == seas, c].values.reshape(-1, 1)).flatten()


In [6]:
"""
Predict grazing intensity for chosen model
"""
# Change rare classes to 'Other' to match training data
df_wkly_grid_full['PC_dmt'].replace({'Bare_veg': 'Other', 'Bare': 'Other', 'UNK': 'Other', 'Shrub': 'Other'}, 
                               inplace=True)

# set the model for prediction
pred_model = 'M_best'

# predict relative probability of grazing across all data from seasonal models
for seas, df_sub in df_wkly_grid_full.groupby('season'):
    df_wkly_grid_full.loc[df_wkly_grid_full['season'] == seas, 'pred_seas'] = seas_mod_dict[seas][pred_model].predict(df_sub) * df_wkly_grid_full['grazing_wkly_sum'] / (df_wkly_grid_full['grazing_wkly_sum'] / 
                                                                             df_wkly_grid_full.groupby(
                                                                                 ['Pasture', 'Steer_ID', 'week'])['grazing_secs'].transform('count'))

In [7]:
"""
Bin seasonal predictions for categorical plotting and mapping
"""
rel_freq_bins = [0.0, 0.6, 0.8, 1.2, 1.4, 1000.0]
bin_labs = ['Low', 'Mod. Low', 'Avg.', 'Mod. Hi', 'Hi']
df_wkly_grid_full['grazing_rel_freq_grp'] = pd.cut(df_wkly_grid_full['grazing_rel_freq'], 
                                          bins=rel_freq_bins, 
                                          labels=bin_labs,
                                          include_lowest=True)
df_wkly_grid_full['pred_seas_grp'] = pd.cut(df_wkly_grid_full['pred_seas'], 
                                          bins=rel_freq_bins, 
                                          labels=bin_labs,
                                          include_lowest=True)

In [8]:
"""
Create and organize subsets of gridded data for plotting and analyses
"""
df_wkly_grid_full['season'] = pd.Categorical(df_wkly_grid_full['season'], ['early', 'mid', 'late'])
df_wkly_grid_train = df_wkly_grid_full[df_wkly_grid_full['mod_data'] == 'train']
df_wkly_grid_test = df_wkly_grid_full[df_wkly_grid_full['mod_data'] == 'test']
df_wkly_grid_train_means = df_wkly_grid_train.groupby(
    ['season', 'week', 'Pasture', 'UTM_X', 'UTM_Y']).mean()#.reset_index()
for colname in ['PC_dmt', 'TPC_c']:
    df_wkly_grid_train_means[colname] = df_wkly_grid_train.groupby(
        ['season', 'week', 'Pasture', 'UTM_X', 'UTM_Y'])[colname].first()
df_wkly_grid_train_means = df_wkly_grid_train_means.reset_index().dropna()

In [9]:
"""
Plot observed versus predicted relative grazing intensity
"""
fig, ax = plt.subplots(figsize=(10, 6))
sns.pointplot(x='pred_seas_grp', y='grazing_rel_freq', hue='mod_data', hue_order=['train', 'test'],
              data=df_wkly_grid_full, ax=ax)
ax.axhline(y=1.0, linestyle='dashed', c='gray')
ax.set(xlabel='Predicted relative grazing intensity (binned)',
      ylabel='Observed relative grazing intensity')
leg = ax.legend()
# change legend texts
new_title = 'Dataset'
leg.set_title(new_title, prop={'weight': 'bold',
        'size': 12,
        })
new_labels = ['Training', 'Testing']
for t, l in zip(leg.texts, new_labels):
    t.set_text(l)
plt.savefig('C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/figures/Best_model_fit.png', 
           bbox_inches='tight', dpi=300)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [160]:
#g = sns.FacetGrid(data=df_wkly_grid_full, col='season', hue='mod_data')
#g.map_dataframe(sns.kdeplot, x='grazing_rel_freq', cumulative=True)
plt.figure()
sns.violinplot(data=df_wkly_grid_full, x='season', y='grazing_rel_freq', hue='mod_data', cut=0)
plt.ylim((0, 2))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

(0.0, 2.0)

In [161]:
g = sns.catplot(data=df_wkly_grid_full, x='pred_seas_grp', y='grazing_rel_freq', col='season', kind='point',
                hue='mod_data', hue_order=['train', 'test'], 
                  legend=False, sharey=True, height=4, aspect=0.8).add_legend(bbox_to_anchor=(0.5, 0.98), 
                                                  loc='upper center',
                                                  borderaxespad=0,
                                                  ncol=2,
                                                 frameon=True)
g.set(xlabel='',
     ylabel='')
g.fig.supxlabel('Predicted relative grazing intensity (binned)', y=-0.05)
g.fig.supylabel('Observed relative grazing intensity', x=-0.01)
for ax in g.axes.flat:
    ax.axhline(y=1.0, linestyle='dashed')
    ax.set_ylim((0.5, 1.5))
    cur_title = ax.get_title()
    ax.set_title(re.sub('season = ', '', cur_title + '-season'))
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width,box.height*0.80])
leg = g.legend
# change legend texts
new_title = 'Dataset'
leg.set_title(new_title, prop={'weight': 'bold',
        'size': 12,
        })
new_labels = ['Training', 'Testing']
for t, l in zip(leg.texts, new_labels):
    t.set_text(l)

plt.savefig('C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/figures/Best_model_fit_X_season.png', 
           bbox_inches='tight', dpi=300, pad_inches=0.1)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [92]:
"""
Get DataFrame of coefficients for best models for each season
"""
df_mbest_coefs_early = pd.read_html((seas_mod_dict['early']['M_best'].summary().tables[1]).as_html(), header=0, index_col=0)[0]
df_mbest_coefs_mid = pd.read_html((seas_mod_dict['mid']['M_best'].summary().tables[1]).as_html(), header=0, index_col=0)[0]
df_mbest_coefs_late = pd.read_html((seas_mod_dict['late']['M_best'].summary().tables[1]).as_html(), header=0, index_col=0)[0]

In [93]:
"""
Display best model coefficients for chosen season. Significant if red
"""

def style_sig(v, props=''):
    return np.repeat(props, len(v)) if v['P>|z|'] < 0.05 else None

print('\n---------------------- Early --------------------')
display(df_mbest_coefs_early.style.apply(style_sig, props='color:red;', axis=1))

print('\n---------------------- Mid --------------------')
display(df_mbest_coefs_mid.style.apply(style_sig, props='color:red;', axis=1))

print('\n---------------------- Late --------------------')
display(df_mbest_coefs_late.style.apply(style_sig, props='color:red;', axis=1))



---------------------- Early --------------------


Unnamed: 0,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-7.5168,0.033,-225.674,0.0,-7.582,-7.452
"C(PC_dmt, Treatment(reference=""C3_C4_mix""))[T.C3]",1.8562,0.058,32.177,0.0,1.743,1.969
"C(PC_dmt, Treatment(reference=""C3_C4_mix""))[T.C4]",-0.2979,0.038,-7.869,0.0,-0.372,-0.224
"C(PC_dmt, Treatment(reference=""C3_C4_mix""))[T.Other]",-0.362,0.167,-2.169,0.03,-0.689,-0.035
"C(PC_dmt, Treatment(reference=""C3_C4_mix""))[T.Saltgrass]",0.5381,0.059,9.094,0.0,0.422,0.654
"C(TPC_c, Treatment(reference=""Flat Plains""))[T.Highlands]",-0.0126,0.043,-0.291,0.771,-0.097,0.072
"C(TPC_c, Treatment(reference=""Flat Plains""))[T.Lowlands]",0.4472,0.038,11.633,0.0,0.372,0.523
"C(TPC_c, Treatment(reference=""Flat Plains""))[T.Open Slopes]",0.0742,0.037,1.991,0.047,0.001,0.147
"C(TPC_c, Treatment(reference=""Flat Plains""))[T.Other]",-0.0593,0.19,-0.312,0.755,-0.432,0.314
dFence,0.0091,0.014,0.667,0.505,-0.018,0.036



---------------------- Mid --------------------


Unnamed: 0,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-7.2374,0.032,-227.073,0.0,-7.3,-7.175
"C(TPC_c, Treatment(reference=""Flat Plains""))[T.Highlands]",-0.2542,0.054,-4.745,0.0,-0.359,-0.149
"C(TPC_c, Treatment(reference=""Flat Plains""))[T.Lowlands]",0.1208,0.048,2.5,0.012,0.026,0.216
"C(TPC_c, Treatment(reference=""Flat Plains""))[T.Open Slopes]",-0.0869,0.048,-1.83,0.067,-0.18,0.006
"C(TPC_c, Treatment(reference=""Flat Plains""))[T.Other]",-0.3468,0.243,-1.425,0.154,-0.824,0.13
dFence,-0.0165,0.018,-0.924,0.355,-0.051,0.018
dTank,0.0226,0.019,1.2,0.23,-0.014,0.06
Biomass,-0.5314,0.024,-22.526,0.0,-0.578,-0.485
I(Biomass ** 2),-0.1796,0.009,-20.491,0.0,-0.197,-0.162
CP,0.9661,0.023,41.364,0.0,0.92,1.012



---------------------- Late --------------------


Unnamed: 0,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-7.2715,0.05,-145.491,0.0,-7.369,-7.174
dFence,0.0654,0.022,2.951,0.003,0.022,0.109
dTank,-0.0205,0.02,-1.025,0.305,-0.06,0.019
Biomass,-0.4722,0.026,-18.328,0.0,-0.523,-0.422
I(Biomass ** 2),-0.3504,0.009,-37.763,0.0,-0.369,-0.332
CP,0.4499,0.084,5.374,0.0,0.286,0.614
PC_div,0.0214,0.022,0.993,0.321,-0.021,0.064
alpha,12.053,0.13,92.585,0.0,11.798,12.308


In [94]:
"""
Reformat and save coefficients for table
"""
df_mbest_coefs_early = df_mbest_coefs_early.rename_axis('Param').reset_index()
df_mbest_coefs_early['Param'] = df_mbest_coefs_early['Param'].apply(lambda x: re.sub('\]', 
                                                                                     '',
                                                                                     re.sub('C\(PC_dmt, Treatment\(reference\="C3_C4_mix"\)\)\[T\.', 
                                                                                            '', 
                                                                                            str(x))))
df_mbest_coefs_early['Param'] = df_mbest_coefs_early['Param'].apply(lambda x: re.sub('\]', 
                                                                                     '',
                                                                                     re.sub('C\(TPC_c, Treatment\(reference\="Flat Plains"\)\)\[T\.', 
                                                                                            '', 
                                                                                            str(x))))

df_mbest_coefs_mid = df_mbest_coefs_mid.rename_axis('Param').reset_index()
df_mbest_coefs_mid['Param'] = df_mbest_coefs_mid['Param'].apply(lambda x: re.sub('\]', 
                                                                                     '',
                                                                                     re.sub('C\(PC_dmt, Treatment\(reference\="C3_C4_mix"\)\)\[T\.', 
                                                                                            '', 
                                                                                            str(x))))
df_mbest_coefs_mid['Param'] = df_mbest_coefs_mid['Param'].apply(lambda x: re.sub('\]', 
                                                                                     '',
                                                                                     re.sub('C\(TPC_c, Treatment\(reference\="Flat Plains"\)\)\[T\.', 
                                                                                            '', 
                                                                                            str(x))))

df_mbest_coefs_late = df_mbest_coefs_late.rename_axis('Param').reset_index()
df_mbest_coefs_late['Param'] = df_mbest_coefs_late['Param'].apply(lambda x: re.sub('\]', 
                                                                                     '',
                                                                                     re.sub('C\(PC_dmt, Treatment\(reference\="C3_C4_mix"\)\)\[T\.', 
                                                                                            '', 
                                                                                            str(x))))
df_mbest_coefs_late['Param'] = df_mbest_coefs_late['Param'].apply(lambda x: re.sub('\]', 
                                                                                     '',
                                                                                     re.sub('C\(TPC_c, Treatment\(reference\="Flat Plains"\)\)\[T\.', 
                                                                                            '', 
                                                                                            str(x))))
df_mbest_coefs_early.to_csv(os.path.join(outDIR_tables, 'M_best_coefs_early.csv'))
df_mbest_coefs_mid.to_csv(os.path.join(outDIR_tables, 'M_best_coefs_mid.csv'))
df_mbest_coefs_late.to_csv(os.path.join(outDIR_tables, 'M_best_coefs_late.csv'))

In [199]:
"""
FIGURE: Varibility in crude protein (a) and biomass (b) by sub-season across individual pastures 
"""
fig, axs = plt.subplots(nrows=2, figsize=(10, 6), sharex=True, gridspec_kw={'right': 0.8})
axs[0].axhline(y=8.0, linestyle='dashed', color='gray', alpha=0.8)
p1 = sns.violinplot(x='season', y='CP_orig', hue='Pasture', bw=0.5, cut=0,
            data=df_wkly_grid_train_means, ax=axs[0])
axs[0].set_ylabel('Crude protein (%)')
axs[0].set_xlabel(None)
axs[0]
handles, labels = axs[0].get_legend_handles_labels()
p1.legend().remove()
axs[1].axhline(y=500, linestyle='dashed', color='gray', alpha=0.8)
p2 = sns.violinplot(x='season', y='Biomass_orig', hue='Pasture', bw=0.5, cut=0,
            data=df_wkly_grid_train_means, ax=axs[1])
axs[1].set_ylabel('Biomass (kg/ha)')
axs[1].set_xlabel('Sub-season')
p2.legend().remove()
fig.legend(handles, labels, loc='center right', title='Pasture')
plt.savefig('C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/figures/violinplots_CP_BM_X_Season_X_Pasture.png', 
            bbox_inches='tight', dpi=300)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [197]:
"""
FIGURE: Varibility in crude protein (a) and biomass (b) by sub-season across plant communities
"""
fig, axs = plt.subplots(nrows=2, figsize=(10, 6), sharex=True, gridspec_kw={'right': 0.8})
axs[0].axhline(y=8.0, linestyle='dashed', color='gray', alpha=0.8)
p1 = sns.violinplot(x='season', y='CP_orig', hue='PC_dmt', bw=0.5, cut=0,
            data=df_wkly_grid_train_means, ax=axs[0])
axs[0].set_ylabel('Crude protein (%)')
axs[0].set_xlabel(None)
handles, labels = axs[0].get_legend_handles_labels()
p1.legend().remove()
axs[1].axhline(y=500, linestyle='dashed', color='gray', alpha=0.8)
p2 = sns.violinplot(x='season', y='Biomass_orig', hue='PC_dmt', bw=0.5, cut=0,
            data=df_wkly_grid_train_means, ax=axs[1])
axs[1].set_ylabel('Biomass (kg/ha)')
axs[1].set_xlabel('Sub-season')
axs[1]
p2.legend().remove()
fig.legend(handles, labels, loc='center right', title='Plant community')
plt.savefig('C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/figures/violinplots_CP_BM_X_Season_X_PCdmt.png', 
            bbox_inches='tight', dpi=300)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [198]:
"""
FIGURE: Varibility in crude protein (a) and biomass (b) by sub-season across plant communities
"""
fig, axs = plt.subplots(nrows=2, figsize=(10, 6), sharex=True, gridspec_kw={'right': 0.8})
axs[0].axhline(y=8.0, linestyle='dashed', color='gray', alpha=0.8)
p1 = sns.violinplot(x='season', y='CP_orig', hue='TPC_c', bw=0.5, cut=0,
            data=df_wkly_grid_train_means, ax=axs[0])
axs[0].set_ylabel('Crude protein (%)')
axs[0].set_xlabel(None)
handles, labels = axs[0].get_legend_handles_labels()
p1.legend().remove()
axs[1].axhline(y=500, linestyle='dashed', color='gray', alpha=0.8)
p2 = sns.violinplot(x='season', y='Biomass_orig', hue='TPC_c', bw=0.5, cut=0,
            data=df_wkly_grid_train_means, ax=axs[1])
axs[1].set_ylabel('Biomass (kg/ha)')
axs[1].set_xlabel('Sub-season')
axs[1]
p2.legend().remove()
fig.legend(handles, labels, loc='center right', title='Plant community')
plt.savefig('C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/figures/violinplots_CP_BM_X_Season_X_TPC.png', 
            bbox_inches='tight', dpi=300)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [116]:
"""
Create dataframe of plant community coefficients by model and season
"""

pd.set_option('display.max_rows', 100)
df_coefs['Season'] = pd.Categorical(df_coefs['Season'], ['early', 'mid', 'late'])
coefs_PC_seas = df_coefs[df_coefs['Param'].str.contains('C\(PC_dmt\, Treatment\(reference\="C3_C4_mix"\)\)\[.*\]$', regex=True)]
coefs_PC_seas['Param'] = coefs_PC_seas['Param'].apply(lambda x: re.sub('C\(PC_dmt, Treatment\(reference\="C3_C4_mix"\)\)\[T\.', '', x))
coefs_PC_seas['Param'] = coefs_PC_seas['Param'].apply(lambda x: re.sub('\]', '', x))
coefs_PC_seas_agg = coefs_PC_seas.groupby(['Model','week', 'Season',  'Pasture', 'Param']).mean().reset_index().dropna().groupby(['Model', 'Season', 'Param'])['coef'].agg(
    [('coef_mean', np.mean),
     ('coef_lwr95', lambda x: sms.DescrStatsW(x).zconfint_mean()[0]),
     ('coef_upr95', lambda x: sms.DescrStatsW(x).zconfint_mean()[1]),
     ('pval', lambda x: np.round(ttest_1samp(x, 0.0)[1], 4))])#.reset_index()
#coefs_PC_seas_agg[coefs_PC_seas_agg.index.isin(['M3_pc', 'M_best'], level='Model')].dropna()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._set_item(key, value)


In [144]:
"""
Figure: Plant community coefficients and 95% CI by sub-season and by pasture for the plant-community-only model (M3_pc) and best model (M5)
"""
import matplotlib.ticker as ticker
g = sns.FacetGrid(coefs_PC_seas[coefs_PC_seas['Model'].isin(['M3_pc', 'M_best'])],
                  col='Season', legend_out=True, sharey=True, gridspec_kws={"wspace":0.4})
#plt.figure()
g.map_dataframe(sns.pointplot, x='coef', y='Param', hue='Model',
                join=False, dodge=0.3).add_legend(loc='upper center',
                                                  borderaxespad=0,
                                                  ncol=2,
                                                  frameon=True)
axs = g.axes
for ax in axs.flat:
    ax.collections[0]
    ax.set_xlim((-1.5, 1.5))
    ax.axvline(x=0, linestyle='dotted', color='black')
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width,box.height*0.75])
    cur_title = ax.get_title()
    ax.set_title(re.sub('Season = ', '', cur_title + '-season'))
    ax.xaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: '{:,.1f}'.format(x)))
    
leg = g.legend
# change legend texts
new_title = 'Model type'
leg.set_title(new_title, prop={'weight': 'bold',
        'size': 12,
        })
new_labels = ['Plant community only (M3a)', 'Fully paramaterized (M5)']
for t, l in zip(leg.texts, new_labels):
    t.set_text(l)
g.set_xlabels('Coefficient')
plt.savefig('C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/figures/PC_dmt.png', 
            bbox_inches='tight', dpi=300)
#sns.pointplot()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

  self.fig.tight_layout(*args, **kwargs)


In [145]:
"""
Create dataframe of topographic position class coefficients by model and season
"""
coefs_TPC_seas = df_coefs[df_coefs['Param'].str.contains('C\(TPC_c\, Treatment\(reference\="Flat Plains"\)\)\[.*\]$', regex=True)]
coefs_TPC_seas['Param'] = coefs_TPC_seas['Param'].apply(lambda x: re.sub('C\(TPC_c, Treatment\(reference\="Flat Plains"\)\)\[T\.', '', x))
coefs_TPC_seas['Param'] = coefs_TPC_seas['Param'].apply(lambda x: re.sub('\]', '', x))
coefs_TPC_seas_agg = coefs_TPC_seas.groupby(['Model','week', 'Season',  'Pasture', 'Param']).mean().reset_index().dropna().groupby(['Model', 'Season', 'Param'])['coef'].agg(
    [('coef_mean', np.mean),
     ('coef_lwr95', lambda x: sms.DescrStatsW(x).zconfint_mean()[0]),
     ('coef_upr95', lambda x: sms.DescrStatsW(x).zconfint_mean()[1]),
     ('pval', lambda x: np.round(ttest_1samp(x, 0.0)[1], 4))])#.reset_index()
#coefs_TPC_seas_agg[coefs_TPC_seas_agg.index.isin(['M1', 'M_best'], level='Model')].dropna()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._set_item(key, value)


In [146]:
"""
Figure: TPC coefficients and 95% CI by season and by pasture for the topographic position-only model (M1) and the best model (M5)
"""
g = sns.FacetGrid(coefs_TPC_seas[coefs_TPC_seas['Model'].isin(['M1', 'M_best'])],
                  col='Season', legend_out=True, sharey=True)
#plt.figure()
g.map_dataframe(sns.pointplot, x='coef', y='Param', hue='Model', join=False, dodge=0.3).add_legend(loc='upper center',
                                                                                                   borderaxespad=0, 
                                                                                                   ncol=2, 
                                                                                                   frameon=True)
axs = g.axes
for ax in axs.flat:
    ax.set_xlim((-1.5, 1.5))
    ax.axvline(x=0, linestyle='dotted', color='black')
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width, box.height*0.75])
    cur_title = ax.get_title()
    ax.set_title(re.sub('Season = ', '', cur_title + '-season'))
    
leg = g.legend
# change legend texts
new_title = 'Model type'
leg.set_title(new_title, prop={'weight': 'bold',
        'size': 12,
        })
new_labels = ['Topographic position only (M1)', 'Fully paramaterized (M5)']
for t, l in zip(leg.texts, new_labels):
    t.set_text(l)
plt.savefig('C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/figures/TCP.png', 
            bbox_inches='tight', dpi=300)
#sns.pointplot()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [127]:
"""
Compare models by season: how does selection change with biomass and relative CP (within season)
"""
cp_q_dict = {
    'Lowest': 0.10,
    'Below avg': 0.25,
    'Average': 0.5,
    'Above avg': 0.75,
    'Highest': 0.90
}
ref_TPC = 'Flat Plains'
ref_pc = 'C3_C4_mix'
df_pred = pd.DataFrame(columns=['season','dFence_orig', 'dTank_orig', 'Biomass_orig', 'CP_orig','PC_div_orig',
                                'PC_dmt', 'TPC_c', 'Model'])
for mod in tqdm(['M2_bm', 'M2_top', 'M4_top', 'M_best']):
    for seas, df_sub in df_wkly_grid_full[df_wkly_grid_full['mod_data'] == 'train'].groupby('season'):
        if len(df_sub[(df_sub['PC_dmt'] == ref_pc) & (df_sub['TPC_c'] == ref_TPC)]) < 100:
            continue
        else:
            df_tmp = df_sub[(df_sub['TPC_c'] == ref_TPC) & (df_sub['PC_dmt'] == ref_pc)]
            for cp_q in cp_q_dict:
                df_pred = df_pred.append(pd.DataFrame({
                    'season': seas,
                    'Relative CP': cp_q,
                    'dFence_orig': df_tmp['dFence_orig'].median(),
                    'dTank_orig': df_tmp['dTank_orig'].median(),
                    'Biomass_orig': np.arange(df_tmp['Biomass_orig'][np.abs(df_tmp['CP_orig'] - df_tmp['CP_orig'].quantile(cp_q_dict[cp_q])) < 0.25].min(), 
                         df_tmp['Biomass_orig'][np.abs(df_tmp['CP_orig'] - df_tmp['CP_orig'].quantile(cp_q_dict[cp_q])) < 0.25].max(),  10),
                    'CP_orig': df_tmp['CP_orig'].quantile(cp_q_dict[cp_q]),
                    'PC_div_orig': df_tmp['PC_div_orig'].median(),
                    'PC_dmt': ref_pc,
                    'TPC_c': ref_TPC,
                    'Model': mod
                }))
        for c in ['dFence', 'dTank', 'Biomass', 'CP', 'PC_div']:
            scaler = scaler_dict[seas][c]
            df_pred.loc[df_pred['season'] == seas, c] = scaler.transform(
                df_pred.loc[df_pred['season'] == seas, c + '_orig'].values.reshape(-1, 1)).flatten()
        df_pred.loc[(df_pred['season'] == seas) & (df_pred['Model'] == mod), 
                    'mod_pred'] = seas_mod_dict[seas][mod].predict(df_pred.loc[(df_pred['season'] == seas) & (df_pred['Model'] == mod)])
df_pred['season'] = pd.Categorical(df_pred['season'], ['early', 'mid', 'late'])
df_pred['pred_rel_freq'] = np.nan
for seas in df_pred.season.unique():
    df_wkly_grid_sub = df_wkly_grid_full[(df_wkly_grid_full['season'] == seas) & (df_wkly_grid_full['mod_data'] == 'train')]
    df_pred.loc[df_pred['season'] == seas, 'pred_rel_freq'] = (df_pred.loc[df_pred['season'] == seas, 'mod_pred'] * df_wkly_grid_sub['grazing_wkly_sum'].mean()) / (df_wkly_grid_sub['grazing_wkly_sum'].mean() / df_wkly_grid_sub.groupby(
        ['week', 'Pasture', 'Steer_ID'])['grazing_wkly_sum'].count().mean())

  0%|          | 0/4 [00:00<?, ?it/s]

In [128]:
"""
Plot marginal effects of biomass and relative CP by model and season
"""
x_var = 'Biomass_orig'
g = sns.FacetGrid(df_pred[df_pred['Model'] == 'M_best'], col='season', hue='Relative CP', col_order=['early', 'mid', 'late'], 
                  legend_out=True, sharey=False, sharex=True, palette='viridis')
#plt.figure()
g.map_dataframe(sns.lineplot, x=x_var, y='pred_rel_freq').add_legend()
g.set_xlabels('Biomass (kg/ha)')
g.set_ylabels('Relative probability of selection')
axes = g.fig.axes
for ax in axes:
    ax.axhline(y=1.0, linestyle='dashed', color='grey')
    ax.set_ylim((0.0, ax.get_ylim()[-1]))
    if ax.get_ylim()[-1] < 2.0:
        ax.set_ylim((0.0, 2.0))
    cur_title = ax.get_title()
    ax.set_title(re.sub('season = ', '', cur_title + '-season'))
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width*0.9, box.height*0.9])
#g.fig.suptitle('Marginal effects of Biomass and Relative CP')
plt.savefig('C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/figures/Biomass_X_relCP.png', 
            bbox_inches='tight', dpi=300)
#plt.tight_layout()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [23]:
"""
Compare top model by plant community diversity: how does selection change with plant community diversity within dominant plant communities?
"""
ref_TPC = 'Flat Plains'
df_pred = pd.DataFrame(columns=['season','dFence_orig', 'dTank_orig', 'Biomass_orig', 'CP_orig','PC_div_orig',
                                'PC_dmt', 'TPC_c', 'Model'])
for ref_pc in df_wkly_grid_full['PC_dmt'][df_wkly_grid_full['mod_data'] == 'train'].unique():
    print(ref_pc)
    for mod in tqdm(['M_best']):
        for seas, df_sub in df_wkly_grid_full[df_wkly_grid_full['mod_data'] == 'train'].groupby('season'):
            if len(df_sub[(df_sub['PC_dmt'] == ref_pc) & (df_sub['TPC_c'] == ref_TPC)]) < 100:
                continue
            else:
                df_tmp = df_sub[(df_sub['TPC_c'] == ref_TPC) & (df_sub['PC_dmt'] == ref_pc)]
                df_pred = df_pred.append(pd.DataFrame({
                    'season': seas,
                    'dFence_orig': df_tmp['dFence_orig'].median(),
                    'dTank_orig': df_tmp['dTank_orig'].median(),
                    'Biomass_orig': df_tmp['Biomass_orig'].median(),
                    'CP_orig': df_tmp['CP_orig'].quantile(cp_q_dict[cp_q]),
                    'PC_div_orig': np.arange(df_tmp['PC_div_orig'].min(), df_tmp['PC_div_orig'].max(),  0.01),
                    'PC_dmt': ref_pc,
                    'TPC_c': ref_TPC,
                    'Model': mod
                }))
            for c in ['dFence', 'dTank', 'Biomass', 'CP', 'PC_div']:
                scaler = scaler_dict[seas][c]
                df_pred.loc[df_pred['season'] == seas, c] = scaler.transform(
                    df_pred.loc[df_pred['season'] == seas, c + '_orig'].values.reshape(-1, 1)).flatten()
            df_pred.loc[(df_pred['season'] == seas) & (df_pred['Model'] == mod), 
                        'mod_pred'] = seas_mod_dict[seas][mod].predict(df_pred.loc[(df_pred['season'] == seas) & (df_pred['Model'] == mod)])
df_pred['season'] = pd.Categorical(df_pred['season'], ['early', 'mid', 'late'])
df_pred['pred_rel_freq'] = np.nan
for seas in df_pred.season.unique():
    df_wkly_grid_sub = df_wkly_grid_full[(df_wkly_grid_full['season'] == seas) & (df_wkly_grid_full['mod_data'] == 'train')]
    df_pred.loc[df_pred['season'] == seas, 'pred_rel_freq'] = (df_pred.loc[df_pred['season'] == seas, 'mod_pred'] * df_wkly_grid_sub['grazing_wkly_sum'].mean()) / (df_wkly_grid_sub['grazing_wkly_sum'].mean() / df_wkly_grid_sub.groupby(
        ['week', 'Pasture', 'Steer_ID'])['grazing_wkly_sum'].count().mean())

C3_C4_mix


  0%|          | 0/1 [00:00<?, ?it/s]

C4


  0%|          | 0/1 [00:00<?, ?it/s]

Other


  0%|          | 0/1 [00:00<?, ?it/s]

C3


  0%|          | 0/1 [00:00<?, ?it/s]

Saltgrass


  0%|          | 0/1 [00:00<?, ?it/s]

In [24]:
"""
Plot marginal effects of plant community diversity by dominant plant community and season
"""
x_var = 'PC_div_orig'
g = sns.FacetGrid(df_pred[df_pred['Model'] == 'M_best'], col='season', hue='PC_dmt',
                  col_order=['early', 'mid', 'late'], 
                  hue_order=['C4', 'C3_C4_mix', 'C3', 'Saltgrass', 'Other'],
                  legend_out=True, sharey=False)
#plt.figure()
g.map_dataframe(sns.lineplot, x=x_var, y='pred_rel_freq').add_legend()
g.set_xlabels('Diversity (Shannon H)')
g.set_ylabels('Relative probability of selection')
axes = g.axes.flat
for ax in axes:
    ax.axhline(y=1.0, linestyle='dashed', color='grey')
    ax.set_ylim((0.0, ax.get_ylim()[-1]))
    if ax.get_ylim()[-1] < 2.0:
        ax.set_ylim((0.0, 2.0))
    cur_title = ax.get_title()
    ax.set_title(re.sub('season = ', '', cur_title + '-season'))
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width*0.85, box.height*0.9])
leg = g.legend
# change legend texts
new_title = 'Plant community'
leg.set_title(new_title, prop={'weight': 'bold',
        'size': 12,
        })
new_labels=['C4', 'C3/C4 mix', 'C3', 'Saltgrass', 'Other']
for t, l in zip(leg.texts, new_labels):
    t.set_text(l)
g.fig.suptitle('Marginal effects of Plant Community Diversity by Plant Community and Season')
plt.savefig('C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/figures/PC_div_X_PC_dmt.png',
            bbox_inches='tight', dpi=300)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [25]:
"""
Compare models by season: how does selection change with distance to fence and watertank
"""
ref_TPC = 'Flat Plains'
ref_pc = 'C3_C4_mix'
df_pred = pd.DataFrame(columns=['season','dFence_orig', 'dTank_orig', 'Biomass_orig', 'CP_orig','PC_div_orig',
                                'PC_dmt', 'TPC_c', 'Feature', 'Model'])
for mod in tqdm(['M1', 'M_best']):
    for seas, df_sub in df_wkly_grid_full[df_wkly_grid_full['mod_data'] == 'train'].groupby('season'):
        if len(df_sub[(df_sub['PC_dmt'] == ref_pc) & (df_sub['TPC_c'] == ref_TPC)]) < 100:
            continue
        else:
            df_tmp = df_sub[(df_sub['TPC_c'] == ref_TPC) & (df_sub['PC_dmt'] == ref_pc)]
            for feat in ['Fence', 'Tank']:
                if feat == 'Fence':
                    dFence = np.arange(df_tmp['dFence_orig'].min(), df_tmp['dFence_orig'].max(), 10)
                else:
                    dFence = df_tmp['dFence_orig'].median()
                if feat == 'Tank':
                    dTank = np.arange(df_tmp['dTank_orig'].min(), df_tmp['dTank_orig'].max(), 10)
                else:
                    dTank = df_tmp['dTank_orig'].median()
                    
                df_pred = df_pred.append(pd.DataFrame({
                    'season': seas,
                    'dFence_orig': dFence,
                    'dTank_orig': dTank,
                    'Biomass_orig': df_tmp['Biomass_orig'].median(),
                    'CP_orig': df_tmp['CP_orig'].median(),
                    'PC_div_orig': df_tmp['PC_div_orig'].median(),
                    'PC_dmt': ref_pc,
                    'TPC_c': ref_TPC,
                    'Feature': feat,
                    'Model': mod
                }))
        for c in ['dFence', 'dTank', 'Biomass', 'CP', 'PC_div']:
            scaler = scaler_dict[seas][c]
            df_pred.loc[df_pred['season'] == seas, c] = scaler.transform(
                df_pred.loc[df_pred['season'] == seas, c + '_orig'].values.reshape(-1, 1)).flatten()
        df_pred.loc[(df_pred['season'] == seas) & (df_pred['Model'] == mod), 
                    'mod_pred'] = seas_mod_dict[seas][mod].predict(df_pred.loc[(df_pred['season'] == seas) & (df_pred['Model'] == mod)])
df_pred['season'] = pd.Categorical(df_pred['season'], ['early', 'mid', 'late'])
df_pred['pred_rel_freq'] = np.nan
for seas in df_pred.season.unique():
    df_wkly_grid_sub = df_wkly_grid_full[(df_wkly_grid_full['season'] == seas) & (df_wkly_grid_full['mod_data'] == 'train')]
    df_pred.loc[df_pred['season'] == seas, 'pred_rel_freq'] = (df_pred.loc[df_pred['season'] == seas, 'mod_pred'] * df_wkly_grid_sub['grazing_wkly_sum'].mean()) / (df_wkly_grid_sub['grazing_wkly_sum'].mean() / df_wkly_grid_sub.groupby(
        ['week', 'Pasture', 'Steer_ID'])['grazing_wkly_sum'].count().mean())

  0%|          | 0/2 [00:00<?, ?it/s]

In [26]:
df_pred['dFence_orig'][df_pred['Feature'] == 'Tank'] = np.nan
df_pred['dTank_orig'][df_pred['Feature'] == 'Fence'] = np.nan
df_pred_long = pd.melt(df_pred, id_vars=['season', 'Model', 'Feature', 'pred_rel_freq'], value_vars=['dFence_orig', 'dTank_orig'], value_name='Distance')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_pred['dFence_orig'][df_pred['Feature'] == 'Tank'] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_pred['dTank_orig'][df_pred['Feature'] == 'Fence'] = np.nan


In [28]:
"""
Plot marginal effects of distance to fence by model and season
"""
x_var = 'Distance'
g = sns.FacetGrid(df_pred_long[(df_pred_long['Model'].isin(['M1', 'M_best'])) & (df_pred_long['Feature'] == 'Fence')],
                  col='season', hue='Model',
                               col_order=['early', 'mid', 'late'], 
                  legend_out=True, sharey='col', sharex=True)
#plt.figure()
g.map_dataframe(sns.lineplot, x=x_var, y='pred_rel_freq').add_legend(bbox_to_anchor=(0.5, 0.98), 
                                                                     loc='upper center',
                                                                                                   borderaxespad=0, 
                                                                                                   ncol=2, 
                                                                                                   frameon=True)
g.set_xlabels('Distance to nearest fence (m)')
g.set_ylabels('Relative probability of selection')
axes = g.fig.axes
for ax in axes:
    ax.axhline(y=1.0, linestyle='dashed', color='grey')
    ax.set_ylim((0.0, ax.get_ylim()[-1]))
    if ax.get_ylim()[-1] < 2.0:
        ax.set_ylim((0.0, 2.0))
    cur_title = ax.get_title()
    ax.set_title(re.sub('season = ', '', cur_title + '-season'))
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width, box.height*0.8])
leg = g.legend
# change legend texts
new_title = 'Model type'
leg.set_title(new_title, prop={'weight': 'bold',
        'size': 12,
        })
new_labels = ['Null model (M0)', 'Fully paramaterized (M5)']
for t, l in zip(leg.texts, new_labels):
    t.set_text(l)
plt.savefig('C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/figures/dfence.png', 
            bbox_inches='tight', dpi=300)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [30]:
"""
Plot marginal effects of distance to water tank by model and season
"""
x_var = 'Distance'
g = sns.FacetGrid(df_pred_long[(df_pred_long['Model'].isin(['M1', 'M_best'])) & (df_pred_long['Feature'] == 'Tank')],
                  col='season', hue='Model',
                               col_order=['early', 'mid', 'late'], 
                  legend_out=True, sharey='col', sharex=True)
#plt.figure()
g.map_dataframe(sns.lineplot, x=x_var, y='pred_rel_freq').add_legend(bbox_to_anchor=(0.5, 0.98), 
                                                                     loc='upper center',
                                                                                                   borderaxespad=0, 
                                                                                                   ncol=2, 
                                                                                                   frameon=True)
g.set_xlabels('Distance to water tank (m)')
g.set_ylabels('Relative probability of selection')
axes = g.fig.axes
for ax in axes:
    ax.axhline(y=1.0, linestyle='dashed', color='grey')
    ax.set_ylim((0.0, ax.get_ylim()[-1]))
    if ax.get_ylim()[-1] < 2.0:
        ax.set_ylim((0.0, 2.0))
    cur_title = ax.get_title()
    ax.set_title(re.sub('season = ', '', cur_title + '-season'))
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width, box.height*0.8])
leg = g.legend
# change legend texts
new_title = 'Model type'
leg.set_title(new_title, prop={'weight': 'bold',
        'size': 12,
        })
new_labels = ['Null model (M0)', 'Fully paramaterized (M5)']
for t, l in zip(leg.texts, new_labels):
    t.set_text(l)
plt.savefig('C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/figures/dtank.png', 
            bbox_inches='tight', dpi=300)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [32]:
plt.figure()
sns.kdeplot(x='grazing_rel_freq', hue='season', 
            data=df_wkly_grid_train, bw_adjust=2.0, clip=(0, None),
           common_norm=False, cumulative=False, common_grid=False)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:xlabel='grazing_rel_freq', ylabel='Density'>

In [49]:
df_wkly_grid_train['grazing_rel_freq_seas'] = (df_wkly_grid_train.groupby(['season', 'Pasture', 'Steer_ID', 'UTM_X', 'UTM_Y']).grazing_secs.transform('sum') /\
(df_wkly_grid_train.groupby(['season', 'Pasture', 'Steer_ID', 'UTM_X', 'UTM_Y']).grazing_wkly_sum.transform('sum') /\
 df_wkly_grid_train.groupby(['week', 'season', 'Pasture', 'Steer_ID']).grazing_secs.transform(lambda x: np.mean(len(x))))).reset_index(drop=True)

In [50]:
#df_wkly_grid_train[df_wkly_grid_train['mod_data'] == 'train']

In [51]:
#df_wkly_grid_train[(df_wkly_grid_train.Pasture == '7NW') &
#                 (df_wkly_grid_train.UTM_X == 524055.0) & 
#                 (df_wkly_grid_train.UTM_Y == 4522695.0) &
                  #(df_wkly_grid_train.Steer_ID == '2287_D1_2017') & 
#                 (df_wkly_grid_train.mod_data == 'train')].sort_values('Steer_ID')

In [52]:
df_wkly_grid_train['grazing_rel_freq_seas_grp'] = pd.cut(df_wkly_grid_train.groupby(['season', 'Pasture', 'UTM_X', 'UTM_Y'])['grazing_rel_freq_seas'].transform('mean'), 
                                          bins=rel_freq_bins, 
                                          labels=bin_labs,
                                          include_lowest=True)
df_wkly_grid_train['grazing_rel_freq_past_grp'] = pd.cut(df_wkly_grid_train.groupby(['week', 'Pasture', 'UTM_X', 'UTM_Y'])['grazing_rel_freq'].transform('mean'), 
                                          bins=rel_freq_bins, 
                                          labels=bin_labs,
                                          include_lowest=True)

In [59]:
df_wkly_grid_train.groupby('season')['grazing_secs'].apply(lambda x: np.sum(np.isnan(x)))

season
early    1
mid      0
late     0
Name: grazing_secs, dtype: int64

In [40]:
df_seas_grid = df_wkly_grid_train.groupby(['season', 'mod_data', 'Pasture',
                           'UTM_X', 'UTM_Y'])[['grazing_rel_freq_seas', 
                                                           'grazing_rel_freq_seas_grp',
                                                           'Biomass_orig',
                                                           'CP_orig', 
                                                           'PC_dmt', 
                                                           'PC_div_orig']].aggregate(func={
    'grazing_rel_freq_seas': 'last',
    'grazing_rel_freq_seas_grp': 'last',
    'Biomass_orig': np.mean,
    'CP_orig': np.mean,
    'PC_dmt': 'last',
    'PC_div_orig': 'last'
}).reset_index()
    #['mod_data', 'Pasture', 'Steer_ID', 'UTM_X', 'UTM_Y'])['grazing_rel_freq_wkly_grp'].transform(lambda x: '_'.join(x.astype('str')))

In [45]:
df_seas_grid['grazing_rel_freq_seas_grp'].dropna()

691397      Low
691398      Low
691399      Low
691400     Avg.
691401      Low
           ... 
4128583    Avg.
4128584      Hi
4128585      Hi
4128586      Hi
4128904      Hi
Name: grazing_rel_freq_seas_grp, Length: 25953, dtype: category
Categories (5, object): ['Low' < 'Mod. Low' < 'Avg.' < 'Mod. Hi' < 'Hi']

In [41]:
df_seas_grid = df_seas_grid[df_seas_grid['mod_data'] == 'train']
df_seas_grid['grazing_rel_freq_seas_traj'] = df_seas_grid.groupby(['mod_data', 'Pasture', 
                      'UTM_X', 'UTM_Y'])['grazing_rel_freq_seas_grp'].transform('_'.join)
df_seas_grid['season'] = pd.Categorical(df_seas_grid['season'], ['early', 'mid', 'late'])

TypeError: sequence item 0: expected str instance, float found

In [86]:
plt.figure()
sns.pointplot(x='season', y='Biomass', hue='grazing_rel_freq_seas_traj', 
            data=df_seas_grid[df_seas_grid['grazing_rel_freq_seas_traj'].isin(['Low_Low_Low',
                                                                               'Hi_Hi_Hi'])])

  plt.figure()


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:xlabel='season', ylabel='Biomass'>

In [89]:
plt.figure()
sns.pointplot(x='season', y='PC_div', hue='grazing_rel_freq_seas_traj', 
            data=df_seas_grid[df_seas_grid['grazing_rel_freq_seas_traj'].isin(['Low_Low_Low',
                                                                               'Hi_Hi_Hi'])])

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:xlabel='season', ylabel='PC_div'>

In [53]:
df_seas_grid['grazing_rel_freq_seas_traj'][df_seas_grid['season'] == 'late'].value_counts()

Low_Low_Low                 733
Hi_Hi_Hi                    296
Mod. Low_Low_Low            276
Low_Mod. Low_Low            245
Low_Low_Mod. Low            171
                           ... 
Mod. Hi_Mod. Hi_Mod. Hi      15
Mod. Hi_Mod. Hi_Mod. Low     13
Mod. Low_Mod. Hi_Mod. Hi     12
Mod. Hi_Mod. Low_Mod. Hi     11
Low_Mod. Hi_Mod. Hi           8
Name: grazing_rel_freq_seas_traj, Length: 125, dtype: int64

In [65]:
#plt.figure()
df_plot = df_wkly_grid_train[df_wkly_grid_train['mod_data'] == 'train'].groupby(
    ['season', 'week', 'Pasture', 'Steer_ID'])['grazing_rel_freq_seas_grp'].value_counts().reset_index().groupby(
    ['season', 'Pasture', 'level_4']).mean().reset_index().pivot(columns='level_4',
                                                               index=['season', 'Pasture'], values='grazing_rel_freq_seas_grp').reset_index()#.plot(kind='bar', stacked=True)
df_plot['pct_Hi'] = df_plot[['Hi']].sum(axis=1) / df_plot[['Low', 'Mod. Low', 'Avg.', 
                                                  'Mod. Hi', 'Hi']].sum(axis=1)
df_plot['season'] = pd.Categorical(df_plot['season'], ['early', 'mid', 'late'])
#df_plot
plt.figure()
sns.pointplot(x='season', y='pct_Hi', hue='Pasture', data=df_plot)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:xlabel='season', ylabel='pct_Hi'>

In [66]:
df_wkly_grid_train[df_wkly_grid_train['mod_data'] == 'train'].pivot(index=['Pasture', 'UTM_X', 'UTM_Y'])

level_4,season,Pasture,Low,Mod. Low,Avg.,Mod. Hi,Hi,pct_Hi
0,early,15E,524.833333,253.0,208.666667,122.166667,318.833333,0.223351
1,early,17N,393.833333,342.0,261.0,97.666667,288.333333,0.208509
2,early,20SE,422.0,294.0,315.333333,119.666667,269.0,0.189437
3,early,26E,415.333333,313.0,272.0,125.333333,249.333333,0.181333
4,early,31E,402.666667,355.333333,360.666667,179.333333,323.666667,0.199589
5,early,7NW,392.833333,318.0,270.5,140.833333,264.5,0.190745
6,late,15E,354.25,246.25,368.5,166.75,295.0,0.206186
7,late,17N,486.75,249.75,266.0,97.25,283.0,0.204665
8,late,20SE,424.75,291.25,287.25,141.25,276.25,0.19444
9,late,26E,450.5,251.5,264.0,116.5,295.0,0.214156


In [None]:
#plt.figure()
df_plot = df_wkly_grid_train[df_wkly_grid_train['mod_data'] == 'train'].groupby(
    ['season', 'week', 'Pasture', 'Steer_ID'])['grazing_rel_freq_seas_grp'].value_counts().reset_index().groupby(
    ['season', 'Pasture', 'level_4']).mean().reset_index().pivot(columns='level_4',
                                                               index=['season', 'Pasture'], values='grazing_rel_freq_seas_grp').reset_index()#.plot(kind='bar', stacked=True)
df_plot['pct_Hi'] = df_plot[['Hi']].sum(axis=1) / df_plot[['Low', 'Mod. Low', 'Avg.', 
                                                  'Mod. Hi', 'Hi']].sum(axis=1)
df_plot['season'] = pd.Categorical(df_plot['season'], ['early', 'mid', 'late'])
#df_plot
plt.figure()
sns.pointplot(x='season', y='pct_Hi', hue='Pasture', data=df_plot)

In [46]:
"""
Save data to make Sankey diagrams in R
"""
df_sankey = pd.DataFrame(columns=['class_early', 'class_mid', 'class_late', 'value'])
df_sankey['class_early'] = df_seas_grid[df_seas_grid['season'] == 'early']['grazing_rel_freq_seas_grp'].reset_index(drop=True)
df_sankey['class_mid'] = df_seas_grid[df_seas_grid['season'] == 'mid']['grazing_rel_freq_seas_grp'].reset_index(drop=True)
df_sankey['class_late'] = df_seas_grid[df_seas_grid['season'] == 'late']['grazing_rel_freq_seas_grp'].reset_index(drop=True)
df_sankey['value'] = 1
sankey_grp = df_sankey.groupby(['class_early', 'class_mid', 'class_late']).sum().reset_index(drop=False)
sankey_grp = sankey_grp.reindex(columns=sankey_grp.columns.tolist())
sankey_grp.to_csv('C:/SPK_local/zTEMP/gps_grid_sankey2.csv')

In [11]:
aicw_func = lambda x: np.exp(-0.5 * (x - x.min())) / np.sum(np.exp(-0.5 * (x - x.min())))
aicd_func = lambda x: x - x.min()

In [38]:
"""
Analyze best results for specific model comparisons defined in dictionary above
"""
for k in mod_aic_dict:
    df_results.loc[df_results['model'].isin(mod_aic_dict[k]), 
                   'AICw_' + k] = df_results.loc[df_results['model'].isin(mod_aic_dict[k])].groupby(
        ['week', 'Pasture'])['AIC'].transform(aicw_func)
    df_results.loc[df_results['model'].isin(mod_aic_dict[k]), 
                   'AICd_' + k] = df_results.loc[df_results['model'].isin(mod_aic_dict[k])].groupby(
        ['week', 'Pasture'])['AIC'].transform(aicd_func)
    df_results['top_model_' + k] = df_results.groupby(['week', 'Pasture'])['AICw_' + k].transform(
        lambda x: [int(i) for i in x == x.max()])
    df_results['candidate_model_' + k] = df_results.groupby(['week', 'Pasture'])['AICd_' + k].transform(
        lambda x: [int(i) for i in x <= 2.0])

In [39]:
"""
Display the results for the desired model comparison
"""
mod_grp = 'Hw2'
display(df_results.loc[df_results['model'].isin(mod_aic_dict[mod_grp])].groupby(['season', 'model'])[['top_model_' + mod_grp, 'candidate_model_' + mod_grp,
                                                                                'AICw_' + mod_grp, 'AICd_' + mod_grp]].aggregate(
    func={'AICd_' + mod_grp: [np.median, np.std], 'AICw_' + mod_grp: [np.mean, np.std], 
         'top_model_' + mod_grp: np.sum, 'candidate_model_' + mod_grp: np.sum}, axis=1).sort_values(['season', ('AICd_' + mod_grp, 'median')]))
display(df_results.loc[df_results['model'].isin(mod_aic_dict[mod_grp])].groupby(['model'])[['top_model_' + mod_grp, 'candidate_model_' + mod_grp, 
                                                                      'AICw_' + mod_grp, 'AICd_' + mod_grp]].aggregate(
    func={'AICd_' + mod_grp: [np.median, np.std], 'AICw_' + mod_grp: [np.mean, np.std], 
          'top_model_' + mod_grp: np.sum, 'candidate_model_' + mod_grp: np.sum}, axis=1).sort_values(('AICd_' + mod_grp, 'median')))

Unnamed: 0_level_0,Unnamed: 1_level_0,AICd_Hw2,AICd_Hw2,AICw_Hw2,AICw_Hw2,top_model_Hw2,candidate_model_Hw2
Unnamed: 0_level_1,Unnamed: 1_level_1,median,std,mean,std,sum,sum
season,model,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
early,M2g,0.275,1.081198,0.5061748,0.296941,9,16
early,M2f,3.65,15.11663,0.2215112,0.240397,4,8
early,M2e,6.54,15.016862,0.1527321,0.249046,3,5
early,M2d,19.35,53.099368,0.03105445,0.063824,0,3
early,M2b,28.545,41.595856,0.04482712,0.091801,1,2
early,M2a,46.48,59.347148,0.02105054,0.066307,0,1
early,M2c,55.24,83.766058,0.0226499,0.095805,1,1
late,M2g,0.0,1.393833,0.5669245,0.357838,7,10
late,M2f,3.97,13.607834,0.1704526,0.196187,2,5
late,M2e,17.635,17.977953,0.1075712,0.192545,1,3


Unnamed: 0_level_0,AICd_Hw2,AICd_Hw2,AICw_Hw2,AICw_Hw2,top_model_Hw2,candidate_model_Hw2
Unnamed: 0_level_1,median,std,mean,std,sum,sum
model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
M2g,0.0,1.166435,0.576807,0.336463,28,42
M2f,4.55,25.114508,0.171345,0.20511,8,19
M2e,13.79,22.67833,0.135075,0.21963,6,13
M2d,20.395,41.549821,0.050169,0.117503,2,7
M2b,38.48,83.472705,0.048791,0.136041,3,5
M2a,71.34,89.428555,0.00916,0.041377,0,1
M2c,87.71,120.894697,0.008653,0.058657,1,1
