In [1]:
import os
import re
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm
from sklearn.preprocessing import StandardScaler
import pickle
%config Completer.use_jedi = False
%matplotlib widget

In [2]:
"""
Define I/O paths
"""
# input directores
inDIR_data = 'C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/data/'
inDIR_mods = 'C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/models/'

# file name of gridded path intensity data (output from gps_to_gridded_path_intensity.ipynb)
griddata_f = '2017_grazing_time_gridded_all_for_model.csv'

# file name of model results
results_f = '2017_grazing_intensity_model_results.csv'

In [3]:
"""
Define models for analyzing
"""
# create list of saved models
mod_list = ['M0', 'M1', 'M2_bm', 'M2_top', 'M3_pc', 'M3_top', 'M4_top', 'M5_top']

# create dictionary to map top models to original models
top_mod_dict = {
    'M2_top': 'M2g',
    'M3_top': 'M3d',
    'M4_top': 'M4h',
    'M5_top': 'M5'
}

# create dictionaries of saved model results for AIC comparison
mod_aic_dict = {
    'TPC2': [top_mod_dict['M2_top'], 'M2_tpc'],
    'TPC3': [top_mod_dict['M3_top'], 'M3_tpc'],
    'TPC4': [top_mod_dict['M4_top'], top_mod_dict['M5_top']],
    'Hw1': ['M1', 'M5'],
    'Hw2': ['M2a', 'M2b', 'M2c', 'M2d', 'M2e', 'M2f', 'M2g'],
    'Hw3': ['M2a', 'M2b', 'M2c', 'M2d', 'M2e', 'M2f', 'M2g'],
    'Hw4a': ['M2a', 'M2b'],
    'Hw4b': ['M2f', 'M2g'],
    'Hw5': ['M2a', 'M2b', 'M2c', 'M2d', 'M2e', 'M2f', 'M2g'],
    'Hw6': ['M2a', 'M2b', 'M2c', 'M2d', 'M2e', 'M2f', 'M2g'],
    'Hw7': ['M3a', 'M3b', 'M3c', 'M3d'],
    'Hw8': ['M3a', 'M3b', 'M3c', 'M3d'],
    'Hw9': ['M3a', 'M3b', 'M3c', 'M3d'],
    'Ha1': ['M0', 'M1', 'M2', 'M3'],
    'Ha2': ['M0', 'M1', 'M2', 'M3', 'M4'],
    'Ha3': ['M0', 'M1', 'M2', 'M3', 'M4'],
    'Ha4': ['M0', 'M1', 'M2', 'M3', 'M4'],
}

# create dictionaries of saved scalers
scaler_dict = {}
for seas in ['early', 'mid', 'late']:
    scaler_dict[seas] = {}
    for c in ['dFence', 'dTank', 'Biomass', 'CP', 'PC_div']:
        with open(os.path.join(inDIR_mods, 'scaler_' + seas + '_' + c + '.pkl'), 'rb') as f:
            scaler_dict[seas][c] = pickle.load(f)

In [4]:
"""
Load data and models
"""
# read gridded data into dataframe
df_wkly_grid_full = pd.read_csv(os.path.join(inDIR_data, griddata_f), engine='python')

# read model results into dataframe
df_results = pd.read_csv(os.path.join(inDIR_mods, results_f), engine='python')

# create dictionary of the full models
full_mod_dict = {}
for mod in mod_list:
    f_matched = [f for f in os.listdir(os.path.join(inDIR_mods, 'full')) if mod in f]
    if len(f_matched) == 1:
        full_mod_dict[mod] = sm.load(os.path.join(
            inDIR_mods, 'full', f_matched[0]))
    else:
        print('ERROR: multiple matching files found:')
        display(f_mathched)

# create dictionary of the seasonal models
seas_mod_dict = {}
for seas in df_wkly_grid_full[df_wkly_grid_full['mod_data'] == 'train']['season'].unique():
    seas_mod_dict[seas] = {}
    for mod in mod_list:
        f_matched = [f for f in os.listdir(os.path.join(inDIR_mods, 'seas')) if mod in f and seas in f]
        if len(f_matched) == 1:
            seas_mod_dict[seas][mod] = sm.load(os.path.join(
                inDIR_mods, 'seas', f_matched[0]))
        else:
            print('ERROR: multiple matching files found:')
            display(f_mathched)

# create dictionary of the pasture-specific models
past_mod_dict = {}
for past in df_wkly_grid_full[df_wkly_grid_full['mod_data'] == 'train']['Pasture'].unique():
    past_mod_dict[past] = {}
    for mod in mod_list:
        f_matched = [f for f in os.listdir(os.path.join(inDIR_mods, 'past')) if mod in f and past in f]
        if len(f_matched) == 1:
            past_mod_dict[past][mod] = sm.load(os.path.join(
                inDIR_mods, 'past', f_matched[0]))
        else:
            print('ERROR: multiple matching files found:')
            display(f_matched)
            
# scale data back to original values
for c in ['dFence', 'dTank', 'Biomass', 'CP', 'PC_div']:
    for seas in ['early', 'mid', 'late']:
        scaler = scaler_dict[seas][c]
        df_wkly_grid_full.loc[df_wkly_grid_full['season'] == seas, c + '_orig'] = scaler.inverse_transform(
            df_wkly_grid_full.loc[df_wkly_grid_full['season'] == seas, c].values.reshape(-1, 1)).flatten()


In [6]:
"""
Convert rare classes to 'Other'
"""
df_wkly_grid_full['PC_dmt'].replace({'Bare_veg': 'Other', 'Bare': 'Other', 'UNK': 'Other', 'Shrub': 'Other'}, 
                               inplace=True)
#df_wkly_grid_full = df_wkly_grid_full[~df_wkly_grid_full['PC_dmt'].isin(['Bare', 'Bare_veg', 'UNK', 'Shrub'])]
#df_wkly_grid_full = df_wkly_grid_full[~df_wkly_grid_full['TPC_c'].isin(['Other'])]

In [7]:
"""
Predict grazing intensity for all available models
"""
pred_model = 'M5_top'
df_wkly_grid_full['pred_overall'] = full_mod_dict[pred_model].predict(df_wkly_grid_full) * df_wkly_grid_full['grazing_wkly_sum'] / (df_wkly_grid_full['grazing_wkly_sum'] / 
                                                                             df_wkly_grid_full.groupby(
                                                                                 ['Pasture', 'Steer_ID', 'week'])['grazing_secs'].transform('count'))
for seas, df_sub in df_wkly_grid_full.groupby('season'):
    df_wkly_grid_full.loc[df_wkly_grid_full['season'] == seas, 'pred_seas'] = seas_mod_dict[seas][pred_model].predict(df_sub) * df_wkly_grid_full['grazing_wkly_sum'] / (df_wkly_grid_full['grazing_wkly_sum'] / 
                                                                             df_wkly_grid_full.groupby(
                                                                                 ['Pasture', 'Steer_ID', 'week'])['grazing_secs'].transform('count'))
for past, df_sub in df_wkly_grid_full.groupby('Pasture'):
    df_wkly_grid_full.loc[df_wkly_grid_full['Pasture'] == past, 'pred_past'] = seas_mod_dict[seas][pred_model].predict(df_sub) * df_wkly_grid_full['grazing_wkly_sum'] / (df_wkly_grid_full['grazing_wkly_sum'] / 
                                                                             df_wkly_grid_full.groupby(
                                                                                 ['Pasture', 'Steer_ID', 'week'])['grazing_secs'].transform('count'))
#df_wkly_grid_full.loc[(df_wkly_grid_full['pred_M5_overall'] > 1.0), 'pred_M5_overall'] = 1.0
#df_wkly_grid_full.loc[(df_wkly_grid_full['pred_M5_seas'] > 1.0), 'pred_M5_seas'] = 1.0
#df_wkly_grid_full.loc[(df_wkly_grid_full['pred_M5_past'] > 1.0), 'pred_M5_past'] = 1.0

In [8]:
df_wkly_grid_full[df_wkly_grid_full['mod_data'] == 'train']['pred_seas'].describe()

count    1.012720e+05
mean     1.902312e+06
std      3.111755e+08
min      3.233301e-09
25%      5.557977e-01
50%      8.164296e-01
75%      1.184937e+00
max      6.428089e+10
Name: pred_seas, dtype: float64

In [9]:
plt.figure()
sns.boxplot(x='Pasture', y='Biomass', hue='week', data=df_wkly_grid_full[df_wkly_grid_full['mod_data'] == 'train'], showfliers=False)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:xlabel='Pasture', ylabel='Biomass'>

In [10]:
rel_freq_bins = [0.0, 0.4, 0.75, 1.25, 1.6, 1000.0]
bin_labs = ['Low', 'Mod. Low', 'Avg.', 'Mod. Hi', 'Hi']
df_wkly_grid_full['grazing_rel_freq_grp'] = pd.cut(df_wkly_grid_full['grazing_rel_freq'], 
                                          bins=rel_freq_bins, 
                                          labels=bin_labs,
                                          include_lowest=True)

In [11]:
#rel_freq_bins = [0.0, 0.5, 0.75, 0.9, 1.1, 1.25, 1.5, 1000.0]
#bin_labs = ['V. Low', 'Low', 'Mod. Low', 'Avg', 'Mod. Hi', 'Hi', 'V. Hi']
df_wkly_grid_full['pred_overall_grp'] = pd.cut(df_wkly_grid_full['pred_overall'], 
                                          bins=rel_freq_bins, 
                                          labels=bin_labs,
                                          include_lowest=True)
df_wkly_grid_full['pred_seas_grp'] = pd.cut(df_wkly_grid_full['pred_seas'], 
                                          bins=rel_freq_bins, 
                                          labels=bin_labs,
                                          include_lowest=True)
df_wkly_grid_full['pred_past_grp'] = pd.cut(df_wkly_grid_full['pred_past'], 
                                          bins=rel_freq_bins, 
                                          labels=bin_labs,
                                          include_lowest=True)
fig, axs = plt.subplots(nrows=1, ncols=3, sharey=True, figsize=(10, 6))
sns.pointplot(x='pred_overall_grp', y='grazing_rel_freq', data=df_wkly_grid_full, ax=axs[0], hue='mod_data')
sns.pointplot(x='pred_seas_grp', y='grazing_rel_freq', data=df_wkly_grid_full, ax=axs[1], hue='mod_data')
sns.pointplot(x='pred_past_grp', y='grazing_rel_freq', data=df_wkly_grid_full, ax=axs[2], hue='mod_data')
for ax in axs:
    ax.axhline(y=1.0, linestyle='dashed')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [16]:
#fig, axs = plt.subplots(nrows=4, ncols=3, sharey=False, figsize=(10, 6))
g = sns.FacetGrid(df_wkly_grid_full, col='Pasture', col_wrap=3,  hue='mod_data',# col_order=['early', 'mid', 'late'], 
                  legend_out=True, sharey=False)
g.map_dataframe(sns.pointplot, x='pred_seas_grp', y='grazing_rel_freq').add_legend()
for ax in g.axes:
    ax.axhline(y=1.0, linestyle='dashed')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [11]:
aicw_func = lambda x: np.exp(-0.5 * (x - x.min())) / np.sum(np.exp(-0.5 * (x - x.min())))
aicd_func = lambda x: x - x.min()

In [38]:
"""
Analyze best results for specific model comparisons defined in dictionary above
"""
for k in mod_aic_dict:
    df_results.loc[df_results['model'].isin(mod_aic_dict[k]), 
                   'AICw_' + k] = df_results.loc[df_results['model'].isin(mod_aic_dict[k])].groupby(
        ['week', 'Pasture'])['AIC'].transform(aicw_func)
    df_results.loc[df_results['model'].isin(mod_aic_dict[k]), 
                   'AICd_' + k] = df_results.loc[df_results['model'].isin(mod_aic_dict[k])].groupby(
        ['week', 'Pasture'])['AIC'].transform(aicd_func)
    df_results['top_model_' + k] = df_results.groupby(['week', 'Pasture'])['AICw_' + k].transform(
        lambda x: [int(i) for i in x == x.max()])
    df_results['candidate_model_' + k] = df_results.groupby(['week', 'Pasture'])['AICd_' + k].transform(
        lambda x: [int(i) for i in x <= 2.0])

In [39]:
"""
Display the results for the desired model comparison
"""
mod_grp = 'Hw2'
display(df_results.loc[df_results['model'].isin(mod_aic_dict[mod_grp])].groupby(['season', 'model'])[['top_model_' + mod_grp, 'candidate_model_' + mod_grp,
                                                                                'AICw_' + mod_grp, 'AICd_' + mod_grp]].aggregate(
    func={'AICd_' + mod_grp: [np.median, np.std], 'AICw_' + mod_grp: [np.mean, np.std], 
         'top_model_' + mod_grp: np.sum, 'candidate_model_' + mod_grp: np.sum}, axis=1).sort_values(['season', ('AICd_' + mod_grp, 'median')]))
display(df_results.loc[df_results['model'].isin(mod_aic_dict[mod_grp])].groupby(['model'])[['top_model_' + mod_grp, 'candidate_model_' + mod_grp, 
                                                                      'AICw_' + mod_grp, 'AICd_' + mod_grp]].aggregate(
    func={'AICd_' + mod_grp: [np.median, np.std], 'AICw_' + mod_grp: [np.mean, np.std], 
          'top_model_' + mod_grp: np.sum, 'candidate_model_' + mod_grp: np.sum}, axis=1).sort_values(('AICd_' + mod_grp, 'median')))

Unnamed: 0_level_0,Unnamed: 1_level_0,AICd_Hw2,AICd_Hw2,AICw_Hw2,AICw_Hw2,top_model_Hw2,candidate_model_Hw2
Unnamed: 0_level_1,Unnamed: 1_level_1,median,std,mean,std,sum,sum
season,model,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
early,M2g,0.275,1.081198,0.5061748,0.296941,9,16
early,M2f,3.65,15.11663,0.2215112,0.240397,4,8
early,M2e,6.54,15.016862,0.1527321,0.249046,3,5
early,M2d,19.35,53.099368,0.03105445,0.063824,0,3
early,M2b,28.545,41.595856,0.04482712,0.091801,1,2
early,M2a,46.48,59.347148,0.02105054,0.066307,0,1
early,M2c,55.24,83.766058,0.0226499,0.095805,1,1
late,M2g,0.0,1.393833,0.5669245,0.357838,7,10
late,M2f,3.97,13.607834,0.1704526,0.196187,2,5
late,M2e,17.635,17.977953,0.1075712,0.192545,1,3


Unnamed: 0_level_0,AICd_Hw2,AICd_Hw2,AICw_Hw2,AICw_Hw2,top_model_Hw2,candidate_model_Hw2
Unnamed: 0_level_1,median,std,mean,std,sum,sum
model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
M2g,0.0,1.166435,0.576807,0.336463,28,42
M2f,4.55,25.114508,0.171345,0.20511,8,19
M2e,13.79,22.67833,0.135075,0.21963,6,13
M2d,20.395,41.549821,0.050169,0.117503,2,7
M2b,38.48,83.472705,0.048791,0.136041,3,5
M2a,71.34,89.428555,0.00916,0.041377,0,1
M2c,87.71,120.894697,0.008653,0.058657,1,1


In [35]:
"""
Compare models by season: how does selection change with biomass and relative CP (within season)
"""
cp_q_dict = {
    'Lowest': 0.10,
    'Below avg': 0.25,
    'Average': 0.5,
    'Above avg': 0.75,
    'Highest': 0.90
}
ref_TPC = 'Flat Plains'
ref_pc = 'C3_C4_mix'
df_pred = pd.DataFrame(columns=['season','dFence_orig', 'dTank_orig', 'Biomass_orig', 'CP_orig','PC_div_orig',
                                'PC_dmt', 'TPC_c', 'Model'])
for mod in tqdm(['M2_bm', 'M2_top', 'M4_top', 'M5_top']):
    for seas, df_sub in df_wkly_grid_full[df_wkly_grid_full['mod_data'] == 'train'].groupby('season'):
        if len(df_sub[(df_sub['PC_dmt'] == ref_pc) & (df_sub['TPC_c'] == ref_TPC)]) < 100:
            #print(len(df_tmp[(df_tmp['PC_dmt'] == ref_pc) & (df_tmp['TPC_c'] == ref_TPC)]))
            continue
        else:
            df_tmp = df_sub[(df_sub['TPC_c'] == ref_TPC) & (df_sub['PC_dmt'] == ref_pc)]
            #seas = seas_group[0]
            #df_tmp = seas_group[1]
            for cp_q in cp_q_dict:
                df_pred = df_pred.append(pd.DataFrame({
                    'season': seas,
                    'Relative CP': cp_q,
                    'dFence_orig': df_tmp['dFence_orig'].median(),
                    'dTank_orig': df_tmp['dTank_orig'].median(),
                    'Biomass_orig': np.arange(df_tmp['Biomass_orig'][np.abs(df_tmp['CP_orig'] - df_tmp['CP_orig'].quantile(cp_q_dict[cp_q])) < 0.25].min(), 
                         df_tmp['Biomass_orig'][np.abs(df_tmp['CP_orig'] - df_tmp['CP_orig'].quantile(cp_q_dict[cp_q])) < 0.25].max(),  10),
                    'CP_orig': df_tmp['CP_orig'].quantile(cp_q_dict[cp_q]),
                    'PC_div_orig': df_tmp['PC_div_orig'].median(),
                    'PC_dmt': ref_pc,
                    'TPC_c': ref_TPC,
                    'Model': mod
                }))
        for c in ['dFence', 'dTank', 'Biomass', 'CP', 'PC_div']:
            scaler = scaler_dict[seas][c]
            df_pred.loc[df_pred['season'] == seas, c] = scaler.transform(
                df_pred.loc[df_pred['season'] == seas, c + '_orig'].values.reshape(-1, 1)).flatten()
        df_pred.loc[(df_pred['season'] == seas) & (df_pred['Model'] == mod), 
                    'mod_pred'] = seas_mod_dict[seas][mod].predict(df_pred.loc[(df_pred['season'] == seas) & (df_pred['Model'] == mod)])
        #df_wkly_grid_sub = df_wkly_grid_full[(df_wkly_grid_full['season'] == 'seas') & (df_wkly_grid_full['mod_data'] == 'train')]
        #df_pred.loc[
        #    (df_pred['season'] == seas) & (df_pred['Model'] == mod), 'pred_rel_freq'] = (
        #    df_pred.loc[(df_pred['season'] == seas) & (df_pred['Model'] == mod), 'mod_pred'] * 
        #                                                   df_wkly_grid_sub['grazing_wkly_sum'].mean()) / (
        #    df_wkly_grid_sub['grazing_wkly_sum'].mean() / df_wkly_grid_sub.groupby(
        #    ['week', 'Pasture', 'Steer_ID'])['grazing_wkly_sum'].count().mean())
df_pred['season'] = pd.Categorical(df_pred['season'], ['early', 'mid', 'late'])
df_pred['pred_rel_freq'] = np.nan
for seas in df_pred.season.unique():
    df_wkly_grid_sub = df_wkly_grid_full[(df_wkly_grid_full['season'] == seas) & (df_wkly_grid_full['mod_data'] == 'train')]
    df_pred.loc[df_pred['season'] == seas, 'pred_rel_freq'] = (df_pred.loc[df_pred['season'] == seas, 'mod_pred'] * df_wkly_grid_sub['grazing_wkly_sum'].mean()) / (df_wkly_grid_sub['grazing_wkly_sum'].mean() / df_wkly_grid_sub.groupby(
        ['week', 'Pasture', 'Steer_ID'])['grazing_wkly_sum'].count().mean())

  0%|          | 0/4 [00:00<?, ?it/s]

In [13]:
df_pred['pred_rel_freq'].describe()

count    5572.000000
mean        0.937772
std         0.894378
min         0.010787
25%         0.498176
50%         0.782924
75%         1.095839
max         9.204812
Name: pred_rel_freq, dtype: float64

In [41]:
#df_pred

In [14]:
plt.figure()
sns.lineplot(x='Biomass', y='pred_rel_freq', data=df_pred[df_pred['Model'] == 'M2_bm'])

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:xlabel='Biomass', ylabel='pred_rel_freq'>

In [17]:
"""
Plot marginal effects of biomass and relative CP by model and season
"""
x_var = 'Biomass_orig'
g = sns.FacetGrid(df_pred[df_pred['Model'] != 'M2_bm'], col='season', row='Model', hue='Relative CP', col_order=['early', 'mid', 'late'], 
                  legend_out=True, sharey=True, palette='viridis')
#plt.figure()
g.map_dataframe(sns.lineplot, x=x_var, y='pred_rel_freq').add_legend()
axes = g.fig.axes
for idx, seas in enumerate(np.repeat(np.array([['early', 'mid', 'late']]), 3, axis=0).flatten()):
    #sns.lineplot(x=x_var, y='pred_rel_freq', data=df_pred[(df_pred['Model'] == 'M2_bm') & (df_pred['season'] == seas)], 
    #             linestyle='dotted', color='black', ax=axes[idx], legend=False)
    axes[idx].axhline(y=1.0, linestyle='dashed', color='grey')
    if axes[idx].get_ylim()[-1] < 2.0:
        axes[idx].set_ylim((0.0, 2.0))
    #axes[idx].set_yscale('log')
#g.set(yscale='log')
g.fig.subplots_adjust(top=0.9)
g.fig.suptitle('Marginal effects of Biomass and Relative CP by Model and Season')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 0.98, 'Marginal effects of Biomass and Relative CP by Model and Season')

In [36]:
"""
Compare top model by plant community: how does selection change with biomass and CP across plant communities?
"""
cp_q_dict = {
    'Lowest': 0.10,
    'Below avg': 0.25,
    'Average': 0.5,
    'Above avg': 0.75,
    'Highest': 0.90
}
ref_TPC = 'Flat Plains'
df_pred = pd.DataFrame(columns=['season','dFence_orig', 'dTank_orig', 'Biomass_orig', 'CP_orig','PC_div_orig',
                                'PC_dmt', 'TPC_c', 'Model'])
for ref_pc in df_wkly_grid_full['PC_dmt'].unique():
    for mod in tqdm(['M2_bm', 'M2_top', 'M4_top', 'M5_top']):
        for seas, df_sub in df_wkly_grid_full[df_wkly_grid_full['mod_data'] == 'train'].groupby('season'):
            if len(df_sub[(df_sub['PC_dmt'] == ref_pc) & (df_sub['TPC_c'] == ref_TPC)]) < 100:
                #print(len(df_tmp[(df_tmp['PC_dmt'] == ref_pc) & (df_tmp['TPC_c'] == ref_TPC)]))
                continue
            else:
                df_tmp = df_sub[(df_sub['TPC_c'] == ref_TPC) & (df_sub['PC_dmt'] == ref_pc)]
                #seas = seas_group[0]
                #df_tmp = seas_group[1]
                for cp_q in cp_q_dict:
                    df_pred = df_pred.append(pd.DataFrame({
                        'season': seas,
                        'Relative CP': cp_q,
                        'dFence_orig': df_tmp['dFence_orig'].median(),
                        'dTank_orig': df_tmp['dTank_orig'].median(),
                        'Biomass_orig': np.arange(df_tmp['Biomass_orig'][np.abs(df_tmp['CP_orig'] - df_tmp['CP_orig'].quantile(cp_q_dict[cp_q])) < 0.25].min(), 
                             df_tmp['Biomass_orig'][np.abs(df_tmp['CP_orig'] - df_tmp['CP_orig'].quantile(cp_q_dict[cp_q])) < 0.25].max(),  10),
                        'CP_orig': df_tmp['CP_orig'].quantile(cp_q_dict[cp_q]),
                        'PC_div_orig': df_tmp['PC_div_orig'].median(),
                        'PC_dmt': ref_pc,
                        'TPC_c': ref_TPC,
                        'Model': mod
                    }))
            for c in ['dFence', 'dTank', 'Biomass', 'CP', 'PC_div']:
                scaler = scaler_dict[seas][c]
                df_pred.loc[df_pred['season'] == seas, c] = scaler.transform(
                    df_pred.loc[df_pred['season'] == seas, c + '_orig'].values.reshape(-1, 1)).flatten()
            df_pred.loc[(df_pred['season'] == seas) & (df_pred['Model'] == mod), 
                        'mod_pred'] = seas_mod_dict[seas][mod].predict(df_pred.loc[(df_pred['season'] == seas) & (df_pred['Model'] == mod)])
    #df_wkly_grid_sub = df_wkly_grid_full[(df_wkly_grid_full['season'] == 'seas') & (df_wkly_grid_full['mod_data'] == 'train')]
        #df_pred.loc[
        #    (df_pred['season'] == seas) & (df_pred['Model'] == mod), 'pred_rel_freq'] = (
        #    df_pred.loc[(df_pred['season'] == seas) & (df_pred['Model'] == mod), 'mod_pred'] * 
        #                                                   df_wkly_grid_sub['grazing_wkly_sum'].mean()) / (
        #    df_wkly_grid_sub['grazing_wkly_sum'].mean() / df_wkly_grid_sub.groupby(
        #    ['week', 'Pasture', 'Steer_ID'])['grazing_wkly_sum'].count().mean())
df_pred['season'] = pd.Categorical(df_pred['season'], ['early', 'mid', 'late'])
df_pred['pred_rel_freq'] = np.nan
for seas in df_pred.season.unique():
    df_wkly_grid_sub = df_wkly_grid_full[(df_wkly_grid_full['season'] == seas) & (df_wkly_grid_full['mod_data'] == 'train')]
    df_pred.loc[df_pred['season'] == seas, 'pred_rel_freq'] = (df_pred.loc[df_pred['season'] == seas, 'mod_pred'] * df_wkly_grid_sub['grazing_wkly_sum'].mean()) / (df_wkly_grid_sub['grazing_wkly_sum'].mean() / df_wkly_grid_sub.groupby(
        ['week', 'Pasture', 'Steer_ID'])['grazing_wkly_sum'].count().mean())

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

In [37]:
"""
Plot marginal effects of biomass and CP by plant community and season
"""
x_var = 'Biomass_orig'
g = sns.FacetGrid(df_pred, col='season', row='PC_dmt', hue='Relative CP', 
                  legend_out=True, sharey=False, palette='viridis')
g.map_dataframe(sns.lineplot, x=x_var, y='pred_rel_freq').add_legend()
g.fig.subplots_adjust(top=0.925)
g.fig.suptitle('Marginal effects of Biomass and CP by Plant Community and season (model=M5)')
axes = g.fig.axes
for idx, seas in enumerate(np.repeat(np.array([['early', 'mid', 'late']]), 3, axis=0).flatten()):
    #sns.lineplot(x=x_var, y='pred_rel_freq', data=df_pred[(df_pred['Model'] == 'M2_bm') & (df_pred['season'] == seas)], 
    #             linestyle='dotted', color='black', ax=axes[idx], legend=False)
    axes[idx].axhline(y=1.0, linestyle='dashed', color='grey')
    if axes[idx].get_ylim()[-1] < 2.0:
        axes[idx].set_ylim((0.0, 2.0))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 0.98, 'Marginal effects of Biomass and CP by Plant Community and season (model=M5)')

In [34]:
#df_wkly_grid_train = df_wkly_grid_full[df_wkly_grid_full['mod_data'] == 'train']
df_wkly_grid_full['grazing_rel_freq_seas'] = (df_wkly_grid_full.groupby(['mod_data', 'season', 'Pasture', 'Steer_ID', 'UTM_X', 'UTM_Y']).grazing_secs.transform('sum') /\
(df_wkly_grid_full.groupby(['mod_data', 'season', 'Pasture', 'Steer_ID', 'UTM_X', 'UTM_Y']).grazing_wkly_sum.transform('sum') /\
 df_wkly_grid_full.groupby(['mod_data', 'week', 'season', 'Pasture', 'Steer_ID']).grazing_secs.transform(lambda x: np.mean(len(x))))).reset_index(drop=True)

In [36]:
#df_wkly_grid_full[df_wkly_grid_full['mod_data'] == 'train']

In [37]:
#df_wkly_grid_full[(df_wkly_grid_full.Pasture == '7NW') &
#                 (df_wkly_grid_full.UTM_X == 524055.0) & 
#                 (df_wkly_grid_full.UTM_Y == 4522695.0) &
                  #(df_wkly_grid_full.Steer_ID == '2287_D1_2017') & 
#                 (df_wkly_grid_full.mod_data == 'train')].sort_values('Steer_ID')

In [67]:
df_wkly_grid_full['grazing_rel_freq_seas_grp'] = pd.cut(df_wkly_grid_full.groupby(['season', 'Pasture', 'UTM_X', 'UTM_Y'])['grazing_rel_freq_seas'].transform('mean'), 
                                          bins=rel_freq_bins, 
                                          labels=bin_labs,
                                          include_lowest=True)
df_wkly_grid_full['grazing_rel_freq_past_grp'] = pd.cut(df_wkly_grid_full.groupby(['week', 'Pasture', 'UTM_X', 'UTM_Y'])['grazing_rel_freq'].transform('mean'), 
                                          bins=rel_freq_bins, 
                                          labels=bin_labs,
                                          include_lowest=True)

In [63]:
df_wkly_grid_full.groupby('season')['grazing_rel_freq_seas_grp'].value_counts()

season          
early   Low         34402
        Mod. Low    25383
        Avg.        24516
        Hi          23705
        Mod. Hi     11259
late    Low         25236
        Avg.        18552
        Hi          17570
        Mod. Low    16755
        Mod. Hi      8521
mid     Low         23758
        Hi          15691
        Avg.        14080
        Mod. Low    13813
        Mod. Hi      6521
Name: grazing_rel_freq_seas_grp, dtype: int64

In [42]:
df_seas_grid = df_wkly_grid_full.groupby(['season', 'mod_data', 'Pasture',
                           'UTM_X', 'UTM_Y'])[['grazing_rel_freq_seas', 
                                                           'grazing_rel_freq_seas_grp',
                                                           'Biomass',
                                                           'CP', 
                                                           'PC_dmt', 
                                                           'PC_div']].aggregate(func={
    'grazing_rel_freq_seas': 'last',
    'grazing_rel_freq_seas_grp': 'last',
    'Biomass': np.mean,
    'CP': np.mean,
    'PC_dmt': 'last',
    'PC_div': 'last'
}).reset_index()
    #['mod_data', 'Pasture', 'Steer_ID', 'UTM_X', 'UTM_Y'])['grazing_rel_freq_wkly_grp'].transform(lambda x: '_'.join(x.astype('str')))

In [43]:
df_seas_grid = df_seas_grid[df_seas_grid['mod_data'] == 'train']
df_seas_grid['grazing_rel_freq_seas_traj'] = df_seas_grid.groupby(['mod_data', 'Pasture', 
                      'UTM_X', 'UTM_Y'])['grazing_rel_freq_seas_grp'].transform('_'.join)
df_seas_grid['season'] = pd.Categorical(df_seas_grid['season'], ['early', 'mid', 'late'])

In [86]:
plt.figure()
sns.pointplot(x='season', y='Biomass', hue='grazing_rel_freq_seas_traj', 
            data=df_seas_grid[df_seas_grid['grazing_rel_freq_seas_traj'].isin(['Low_Low_Low',
                                                                               'Hi_Hi_Hi'])])

  plt.figure()


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:xlabel='season', ylabel='Biomass'>

In [89]:
plt.figure()
sns.pointplot(x='season', y='PC_div', hue='grazing_rel_freq_seas_traj', 
            data=df_seas_grid[df_seas_grid['grazing_rel_freq_seas_traj'].isin(['Low_Low_Low',
                                                                               'Hi_Hi_Hi'])])

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:xlabel='season', ylabel='PC_div'>

In [53]:
df_seas_grid['grazing_rel_freq_seas_traj'][df_seas_grid['season'] == 'late'].value_counts()

Low_Low_Low                 733
Hi_Hi_Hi                    296
Mod. Low_Low_Low            276
Low_Mod. Low_Low            245
Low_Low_Mod. Low            171
                           ... 
Mod. Hi_Mod. Hi_Mod. Hi      15
Mod. Hi_Mod. Hi_Mod. Low     13
Mod. Low_Mod. Hi_Mod. Hi     12
Mod. Hi_Mod. Low_Mod. Hi     11
Low_Mod. Hi_Mod. Hi           8
Name: grazing_rel_freq_seas_traj, Length: 125, dtype: int64

In [65]:
#plt.figure()
df_plot = df_wkly_grid_full[df_wkly_grid_full['mod_data'] == 'train'].groupby(
    ['season', 'week', 'Pasture', 'Steer_ID'])['grazing_rel_freq_seas_grp'].value_counts().reset_index().groupby(
    ['season', 'Pasture', 'level_4']).mean().reset_index().pivot(columns='level_4',
                                                               index=['season', 'Pasture'], values='grazing_rel_freq_seas_grp').reset_index()#.plot(kind='bar', stacked=True)
df_plot['pct_Hi'] = df_plot[['Hi']].sum(axis=1) / df_plot[['Low', 'Mod. Low', 'Avg.', 
                                                  'Mod. Hi', 'Hi']].sum(axis=1)
df_plot['season'] = pd.Categorical(df_plot['season'], ['early', 'mid', 'late'])
#df_plot
plt.figure()
sns.pointplot(x='season', y='pct_Hi', hue='Pasture', data=df_plot)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:xlabel='season', ylabel='pct_Hi'>

In [66]:
df_wkly_grid_full[df_wkly_grid_full['mod_data'] == 'train'].pivot(index=['Pasture', 'UTM_X', 'UTM_Y'])

level_4,season,Pasture,Low,Mod. Low,Avg.,Mod. Hi,Hi,pct_Hi
0,early,15E,524.833333,253.0,208.666667,122.166667,318.833333,0.223351
1,early,17N,393.833333,342.0,261.0,97.666667,288.333333,0.208509
2,early,20SE,422.0,294.0,315.333333,119.666667,269.0,0.189437
3,early,26E,415.333333,313.0,272.0,125.333333,249.333333,0.181333
4,early,31E,402.666667,355.333333,360.666667,179.333333,323.666667,0.199589
5,early,7NW,392.833333,318.0,270.5,140.833333,264.5,0.190745
6,late,15E,354.25,246.25,368.5,166.75,295.0,0.206186
7,late,17N,486.75,249.75,266.0,97.25,283.0,0.204665
8,late,20SE,424.75,291.25,287.25,141.25,276.25,0.19444
9,late,26E,450.5,251.5,264.0,116.5,295.0,0.214156


In [None]:
#plt.figure()
df_plot = df_wkly_grid_full[df_wkly_grid_full['mod_data'] == 'train'].groupby(
    ['season', 'week', 'Pasture', 'Steer_ID'])['grazing_rel_freq_seas_grp'].value_counts().reset_index().groupby(
    ['season', 'Pasture', 'level_4']).mean().reset_index().pivot(columns='level_4',
                                                               index=['season', 'Pasture'], values='grazing_rel_freq_seas_grp').reset_index()#.plot(kind='bar', stacked=True)
df_plot['pct_Hi'] = df_plot[['Hi']].sum(axis=1) / df_plot[['Low', 'Mod. Low', 'Avg.', 
                                                  'Mod. Hi', 'Hi']].sum(axis=1)
df_plot['season'] = pd.Categorical(df_plot['season'], ['early', 'mid', 'late'])
#df_plot
plt.figure()
sns.pointplot(x='season', y='pct_Hi', hue='Pasture', data=df_plot)

In [46]:
"""
Save data to make Sankey diagrams in R
"""
df_sankey = pd.DataFrame(columns=['class_early', 'class_mid', 'class_late', 'value'])
df_sankey['class_early'] = df_seas_grid[df_seas_grid['season'] == 'early']['grazing_rel_freq_seas_grp'].reset_index(drop=True)
df_sankey['class_mid'] = df_seas_grid[df_seas_grid['season'] == 'mid']['grazing_rel_freq_seas_grp'].reset_index(drop=True)
df_sankey['class_late'] = df_seas_grid[df_seas_grid['season'] == 'late']['grazing_rel_freq_seas_grp'].reset_index(drop=True)
df_sankey['value'] = 1
sankey_grp = df_sankey.groupby(['class_early', 'class_mid', 'class_late']).sum().reset_index(drop=False)
sankey_grp = sankey_grp.reindex(columns=sankey_grp.columns.tolist())
sankey_grp.to_csv('C:/SPK_local/zTEMP/gps_grid_sankey2.csv')