In [1]:
import os
import re
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm
from sklearn.preprocessing import StandardScaler
from matplotlib.ticker import FormatStrFormatter
from scipy.stats import ttest_1samp
import statsmodels.stats.api as sms
import pickle
%config Completer.use_jedi = False
%matplotlib widget

In [2]:
"""
Define I/O paths
"""
# input directores
inDIR_data = 'C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/data/'
inDIR_mods = 'C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/models/'

# output directories
outDIR_tables = 'C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/models/results_tables/'

# file name of gridded path intensity data (output from gps_to_gridded_path_intensity.ipynb)
griddata_f = 'full_grazing_time_gridded_all_for_model.csv'

# file name of model results
results_f = 'full_grazing_intensity_model_results.csv'

# file name of model coefficients
coefs_f = 'full_grazing_intensity_model_coefficients.csv'

In [3]:
"""
Define models for analyzing
"""
# create list of saved models
#mod_list = ['M0', 'M1', 'M2_bm', 'M2_top', 'M3_pc', 'M3_top', 'M4_top', 'M_best']
mod_list = ['M_best']

# create dictionary to map top models to original models
top_mod_dict = {
    'M2_top': 'M2g',
    'M3_top': 'M3d',
    'M4_top': 'M4h',
    'M5_top': 'M5'
}

# create dictionaries of saved model results for AIC comparison
mod_aic_dict = {
    'TPC2': [top_mod_dict['M2_top'], 'M2_tpc'],
    'TPC3': [top_mod_dict['M3_top'], 'M3_tpc'],
    'TPC4': [top_mod_dict['M4_top'], top_mod_dict['M5_top']],
    'Hw1': ['M1', 'M5'],
    'Hw2': ['M2a', 'M2b', 'M2c', 'M2d', 'M2e', 'M2f', 'M2g'],
    'Hw3': ['M2a', 'M2b', 'M2c', 'M2d', 'M2e', 'M2f', 'M2g'],
    'Hw4a': ['M2a', 'M2b'],
    'Hw4b': ['M2f', 'M2g'],
    'Hw5': ['M2a', 'M2b', 'M2c', 'M2d', 'M2e', 'M2f', 'M2g'],
    'Hw6': ['M2a', 'M2b', 'M2c', 'M2d', 'M2e', 'M2f', 'M2g'],
    'Hw7': ['M3a', 'M3b', 'M3c', 'M3d'],
    'Hw8': ['M3a', 'M3b', 'M3c', 'M3d'],
    'Hw9': ['M3a', 'M3b', 'M3c', 'M3d'],
    'Ha1': ['M0', 'M1', 'M2', 'M3'],
    'Ha2': ['M0', 'M1', 'M2', 'M3', 'M4'],
    'Ha3': ['M0', 'M1', 'M2', 'M3', 'M4'],
    'Ha4': ['M0', 'M1', 'M2', 'M3', 'M4'],
}

# create dictionaries of saved scalers
scaler_dict = {}
for yr in [2016, 2017, 2018]:
    scaler_dict[yr] = {}
    for seas in ['early', 'mid', 'late']:
        scaler_dict[yr][seas] = {}
        for c in ['dFence', 'dTank', 'Biomass', 'CP', 'PC_div']:
            with open(os.path.join(inDIR_mods, 'scaler_' + str(yr) + '_' + seas + '_' + c + '.pkl'), 'rb') as f:
                scaler_dict[yr][seas][c] = pickle.load(f)

In [4]:
"""
Define helper functions
"""
def abline(slope, intercept, color='black', linewidth=None):
    """Plot a line from slope and intercept"""
    axes = plt.gca()
    x_vals = np.array(axes.get_xlim())
    y_vals = intercept + slope * x_vals
    plt.plot(x_vals, y_vals, '-', c=color, linewidth=linewidth)



In [5]:
"""
Load data and models
"""
# read gridded data into dataframe
df_wkly_grid_full = pd.read_csv(os.path.join(inDIR_data, griddata_f), engine='python')

# read model results into dataframe
df_results = pd.read_csv(os.path.join(inDIR_mods, results_f), engine='python')

# read model results into dataframe
df_coefs = pd.read_csv(os.path.join(inDIR_mods, coefs_f), engine='python')

# create dictionary of the yearly models
yrly_mod_dict = {}
for yr in df_wkly_grid_full[df_wkly_grid_full['mod_data'] == 'train']['Year'].unique():
    yrly_mod_dict[yr] = {}
    for seas in df_wkly_grid_full[df_wkly_grid_full['mod_data'] == 'train']['season_str'].unique():
        yrly_mod_dict[yr][seas] = {}
        for mod in mod_list:
            f_matched = [f for f in os.listdir(os.path.join(inDIR_mods, 'seas')) if mod in f and seas in f and str(yr) in f]
            if len(f_matched) == 1:
                yrly_mod_dict[yr][seas][mod] = sm.load(os.path.join(
                    inDIR_mods, 'seas', f_matched[0]))
            else:
                print('ERROR: None and/or multiple matching files found:')
                display(f_matched)

In [6]:
# scale data back to original values
for yr in tqdm(scaler_dict):
    for seas in scaler_dict[yr]:
        for c in scaler_dict[yr][seas]:
            scaler = scaler_dict[yr][seas][c]
            df_wkly_grid_full.loc[(df_wkly_grid_full['Year'] == yr) & 
                                  (df_wkly_grid_full['season_str'] == seas), c + '_orig'] = scaler.inverse_transform(
                df_wkly_grid_full.loc[(df_wkly_grid_full['Year'] == yr) &
                                      (df_wkly_grid_full['season_str'] == seas), c].values.reshape(-1, 1)).flatten()

  0%|          | 0/3 [00:00<?, ?it/s]

In [7]:
"""
Predict grazing intensity for chosen model
"""
# Change rare classes to 'Other' to match training data
df_wkly_grid_full['PC_dmt'].replace({'Bare_veg': 'Other', 'Bare': 'Other', 'UNK': 'Other', 'Shrub': 'Other'}, 
                               inplace=True)

# set the model for prediction
pred_model = 'M_best'

# predict relative probability of grazing across all data from seasonal models
for (yr, seas), df_sub in tqdm(df_wkly_grid_full.groupby(['Year', 'season_str'])):
    df_wkly_grid_full.loc[(df_wkly_grid_full['Year'] == yr) &
                          (df_wkly_grid_full['season_str'] == seas), 
                          'pred_seas_secs'] = yrly_mod_dict[yr][seas][pred_model].predict(df_sub)
df_wkly_grid_full['pred_seas'] = df_wkly_grid_full['pred_seas_secs'] * df_wkly_grid_full['grazing_wkly_sum'] / (df_wkly_grid_full['grazing_wkly_sum'] / 
                                                                        df_wkly_grid_full.groupby(
                                                                                 ['Pasture', 
                                                                                  'Year',
                                                                                  'season_str',
                                                                                  'mod_data'])['grazing_secs'].transform('count'))
                                                                        
df_wkly_grid_full['pred_seas'][df_wkly_grid_full['pred_seas'] > 10.0] = 10.0
    # * df_wkly_grid_full['grazing_wkly_sum'] / (df_wkly_grid_full['grazing_wkly_sum'] / 
                                                                             #df_wkly_grid_full.groupby(
                                                                              #  ['Pasture', 'mod_data'])['grazing_secs'].transform('count'))

  0%|          | 0/9 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_wkly_grid_full['pred_seas'][df_wkly_grid_full['pred_seas'] > 10.0] = 10.0


In [8]:
"""
Bin seasonal predictions for categorical plotting and mapping
"""
rel_freq_bins = [0.0, 0.6, 0.8, 1.2, 1.4, 10.0]
bin_labs = ['Low', 'Mod. Low', 'Avg.', 'Mod. Hi', 'Hi']
df_wkly_grid_full['grazing_rel_freq_grp'] = pd.cut(df_wkly_grid_full['grazing_rel_freq'], 
                                          bins=rel_freq_bins, 
                                          labels=bin_labs,
                                          include_lowest=True)
df_wkly_grid_full['pred_seas_grp'] = pd.cut(df_wkly_grid_full['pred_seas'], 
                                          bins=rel_freq_bins, 
                                          labels=bin_labs,
                                          include_lowest=True)

In [9]:
"""
Create and organize subsets of gridded data for plotting and analyses
"""
df_wkly_grid_full['season'] = pd.Categorical(df_wkly_grid_full['season_str'], ['early', 'mid', 'late'])
df_wkly_grid_train = df_wkly_grid_full[df_wkly_grid_full['mod_data'] == 'train']
df_wkly_grid_test = df_wkly_grid_full[df_wkly_grid_full['mod_data'] == 'test']


In [10]:
#df_wkly_grid_train[df_wkly_grid_train['pred_seas'] == 10]['pred_seas_grp'].value_counts()

In [11]:
"""
Plot overall observed versus predicted relative grazing intensity for best model
"""
fig, ax = plt.subplots(figsize=(10, 6))
sns.pointplot(x='pred_seas_grp', y='grazing_rel_freq',
              data=df_wkly_grid_train, ax=ax, 
              ci=95, n_boot=500, color='black', capsize=0.1, join=False)
ax.axhline(y=1.0, linestyle='dashed', c='gray')
ax.set_xlabel('Predicted relative grazing intensity (binned)',  size=12.0, labelpad=15)
ax.set_ylabel('Observed relative grazing intensity', size=12.0, labelpad=15)
ax.tick_params(axis='y', labelsize=14.0)
ax.set_xticklabels(ax.get_xticklabels(), size=14.0)
plt.savefig('C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/figures/Best_model_fit.png', 
           bbox_inches='tight', dpi=300)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [12]:
"""
Plot seasonal observed versus predicted relative grazing intensity for best model
"""
g = sns.catplot(data=df_wkly_grid_train, x='pred_seas_grp', y='grazing_rel_freq', col='season', row='Year', kind='point',
                legend=False, sharey=True, height=2.5, aspect=1.2, margin_titles=True,
                ci=95, n_boot=500, color='black', capsize=0.1, join=False)
g.set(xlabel='',
     ylabel='')
g.fig.supxlabel('Predicted relative grazing intensity (binned)', y=-0.1)
g.fig.supylabel('Observed relative grazing intensity', x=-0.05)
for ax in g.axes.flat:
    ax.axhline(y=1.0, linestyle='dashed', color='black')
    ax.tick_params(axis='y', labelsize=12.0)
    ax.set_xticklabels(ax.get_xticklabels(), size=12.0, rotation=45, horizontalalignment='right')
    plt.setp(ax.texts, text="") # remove the original texts


g.set_axis_labels()
g.set_titles(row_template = '{row_name}', 
             col_template = '{col_name}-season', 
             size=14.0,
             fontweight='bold')
plt.subplots_adjust(hspace=0.2, wspace=0.2)
plt.savefig('C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/figures/Best_model_fit_X_season.png', 
           bbox_inches='tight', dpi=300, pad_inches=0.25)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [13]:
"""
Get DataFrame of coefficients for best models for each season
"""
coefs_dict = {}

for yr in tqdm(yrly_mod_dict):
    coefs_dict[yr] = {}
    for seas in yrly_mod_dict[yr]:
        coefs_dict[yr][seas] = {}
        for mod in yrly_mod_dict[yr][seas]:
            coefs_dict[yr][seas][mod] = pd.read_html((yrly_mod_dict[yr][seas][mod].summary().tables[1]).as_html(), 
                                                      header=0, index_col=0)[0]

  0%|          | 0/3 [00:00<?, ?it/s]

In [14]:
"""
Reformat and save coefficients for final model for table
"""
for yr in tqdm(coefs_dict):
    for seas in coefs_dict[yr]:
        for mod in coefs_dict[yr][seas]:
            coefs_dict[yr][seas][mod] = coefs_dict[yr][seas][mod].rename_axis('Param').reset_index()
            coefs_dict[yr][seas][mod]['Param'] = coefs_dict[yr][seas][mod]['Param'].apply(
                lambda x: re.sub('\]', 
                                 '',
                                 re.sub('C\(PC_dmt, Treatment\(reference\="C3_C4_mix"\)\)\[T\.', 
                                        '', 
                                        str(x))))
            coefs_dict[yr][seas][mod]['Param'] = coefs_dict[yr][seas][mod]['Param'].apply(
                lambda x: re.sub('\]', 
                                 '',
                                 re.sub('C\(TPC_c, Treatment\(reference\="Flat Plains"\)\)\[T\.', 
                                        'TPC:', 
                                        str(x))))
            coefs_dict[yr][seas][mod]['Param'] = coefs_dict[yr][seas][mod]['Param'].apply(
                lambda x: re.sub('TPC:', '', re.sub('TPC:Other', 'Complex', x)))
        
idx = 0
for yr in coefs_dict:
    for seas in coefs_dict[yr]:
        if idx == 0:
            df_coefs_seas = coefs_dict[yr][seas]['M_best']
            df_coefs_seas['season_str'] = seas
            df_coefs_seas['Year'] = yr
        else:
            df_tmp = coefs_dict[yr][seas]['M_best']
            df_tmp['season_str'] = seas
            df_tmp['Year'] = yr
            df_coefs_seas = df_coefs_seas.append(df_tmp)
        idx += 1
#        coefs_dict[yr][seas]['M_best'].to_csv(os.path.join(outDIR_tables, 'M_best_coefs_' + str(yr) + '_' + seas + '.csv'))

  0%|          | 0/3 [00:00<?, ?it/s]

In [15]:
df_coefs_seas['Model'] = 'M_best'
df_coefs_seas['coef_ci_lwr'] = df_coefs_seas['[0.025']
df_coefs_seas['coef_ci_upr'] = df_coefs_seas['0.975]']
df_coefs_seas['Param'] = df_coefs_seas['Param'].apply(
                lambda x: re.sub('\]', 
                                 '',
                                 re.sub('C\(PC_dmt, Treatment\(reference\="C3_C4_mix"\)\)\[T\.', 
                                        '', 
                                        str(x))))

df_coefs_seas['Param'] = df_coefs_seas['Param'].apply(
                lambda x: re.sub('\]', 
                                 '',
                                 re.sub('C\(TPC_c, Treatment\(reference\="Flat Plains"\)\)\[T\.', 
                                        'TPC:', 
                                        str(x))))

df_coefs_seas['Param'] = df_coefs_seas['Param'].apply(
                lambda x: re.sub('TPC:', '', re.sub('TPC:Other', 'Complex', x)))

df_coefs_seas['significant'] = df_coefs_seas.apply(lambda v: 'Yes' if np.sign(v['coef_ci_lwr']) == np.sign(v['coef_ci_upr']) else 'No', axis=1)

df_coefs_seas['season'] = pd.Categorical(df_coefs_seas['season_str'], ['early', 'mid', 'late'])

df_coefs_seas = df_coefs_seas.sort_values(['Year', 'season'])

df_coefs_seas

Unnamed: 0,Param,coef,std err,z,P>|z|,[0.025,0.975],season_str,Year,Model,coef_ci_lwr,coef_ci_upr,significant,season
0,Intercept,-7.4917,0.028,-265.162,0.000,-7.547,-7.436,early,2016,M_best,-7.547,-7.436,Yes,early
1,C3,0.3568,0.087,4.111,0.000,0.187,0.527,early,2016,M_best,0.187,0.527,Yes,early
2,C4,-0.1739,0.029,-5.985,0.000,-0.231,-0.117,early,2016,M_best,-0.231,-0.117,Yes,early
3,Other,1.0033,0.152,6.581,0.000,0.704,1.302,early,2016,M_best,0.704,1.302,Yes,early
4,Saltgrass,-0.3233,0.052,-6.159,0.000,-0.426,-0.220,early,2016,M_best,-0.426,-0.220,Yes,early
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32,Saltgrass:PC_div,0.0740,0.048,1.546,0.122,-0.020,0.168,late,2018,M_best,-0.020,0.168,No,late
33,PC_div:Biomass,0.0526,0.017,3.116,0.002,0.020,0.086,late,2018,M_best,0.020,0.086,Yes,late
34,PC_div:I(Biomass ** 2),-0.0039,0.006,-0.688,0.491,-0.015,0.007,late,2018,M_best,-0.015,0.007,No,late
35,PC_div:CP,-0.0523,0.025,-2.133,0.033,-0.100,-0.004,late,2018,M_best,-0.100,-0.004,Yes,late


In [16]:
 
"""df_coefs_wkly = df_coefs.drop(columns='Unnamed: 0').groupby(['Year', 
                                             'Season',
                                             'Model', 
                                             'week', 
                                             'Pasture', 
                                             'Param']).mean().reset_index()
df_coefs_past = df_coefs_wkly.groupby(['Year',
                                       'Season',
                                       'Model',
                                       'Pasture',
                                       'Param']).mean().reset_index().drop(columns='week')"""
"""
df_coefs_seas = df_coefs.drop(columns='Unnamed: 0').groupby(['Year',                                             
                       'season_str',
                       'Model',
                       'Param']).mean().reset_index()#.drop(columns='week')
df_coefs_seas['coef_ci_lwr'] = df_coefs.drop(columns='Unnamed: 0').groupby(['Year',                                             
                       'season_str',
                       'Model',
                       'Param'])['coef'].apply(lambda x: np.mean(x) - 1.97*(np.std(x)/np.sqrt(len(x)))).values
df_coefs_seas['coef_ci_upr'] = df_coefs.drop(columns='Unnamed: 0').groupby(['Year',                                             
                       'season_str',
                       'Model',
                       'Param'])['coef'].apply(lambda x: np.mean(x) + 1.97*(np.std(x)/np.sqrt(len(x)))).values
df_coefs_seas['Param'] = df_coefs_seas['Param'].apply(
                lambda x: re.sub('\]', 
                                 '',
                                 re.sub('C\(PC_dmt, Treatment\(reference\="C3_C4_mix"\)\)\[T\.', 
                                        '', 
                                        str(x))))

df_coefs_seas['Param'] = df_coefs_seas['Param'].apply(
                lambda x: re.sub('\]', 
                                 '',
                                 re.sub('C\(TPC_c, Treatment\(reference\="Flat Plains"\)\)\[T\.', 
                                        'TPC:', 
                                        str(x))))

df_coefs_seas['Param'] = df_coefs_seas['Param'].apply(
                lambda x: re.sub('TPC:', '', re.sub('TPC:Other', 'Complex', x)))

df_coefs_seas['significant'] = df_coefs_seas.apply(lambda v: 'Yes' if np.sign(v['coef_ci_lwr']) == np.sign(v['coef_ci_upr']) else 'No', axis=1)

df_coefs_seas['season'] = pd.Categorical(df_coefs_seas['season_str'], ['early', 'mid', 'late'])
"""
for yr in df_coefs_seas['Year'].unique():
    for seas in df_coefs_seas['season_str'].unique():
        df_coefs_seas[(df_coefs_seas['Year'] == yr) &
                              (df_coefs_seas['season_str'] == seas) &
                              (df_coefs_seas['Model'] == 'M_best')].to_csv(os.path.join(outDIR_tables, 'M_best_coefs_' + str(yr) + '_' + seas + '.csv'))

In [17]:
"""
Display best model coefficients for chosen season. Significant if red
"""

def style_sig(v, props=''):
    return np.repeat(props, len(v)) if v['significant'] == 'Yes' else None

for yr in tqdm(df_coefs_seas['Year'].unique()):
    for seas in df_coefs_seas[df_coefs_seas['Year'] == yr]['season'].unique():
        print('\n---------------------- ' + str(yr) + ': ' + seas + ' --------------------')
        display(df_coefs_seas[(df_coefs_seas['Year'] == yr) &
                              (df_coefs_seas['season'] == seas) &
                              (df_coefs_seas['Model'] == 'M_best')].style.apply(style_sig, 
                                                                                props='color:red;',
                                                                                axis=1))


  0%|          | 0/3 [00:00<?, ?it/s]


---------------------- 2016: early --------------------


Unnamed: 0,Param,coef,std err,z,P>|z|,[0.025,0.975],season_str,Year,Model,coef_ci_lwr,coef_ci_upr,significant,season
0,Intercept,-7.4917,0.028,-265.162,0.0,-7.547,-7.436,early,2016,M_best,-7.547,-7.436,Yes,early
1,C3,0.3568,0.087,4.111,0.0,0.187,0.527,early,2016,M_best,0.187,0.527,Yes,early
2,C4,-0.1739,0.029,-5.985,0.0,-0.231,-0.117,early,2016,M_best,-0.231,-0.117,Yes,early
3,Other,1.0033,0.152,6.581,0.0,0.704,1.302,early,2016,M_best,0.704,1.302,Yes,early
4,Saltgrass,-0.3233,0.052,-6.159,0.0,-0.426,-0.22,early,2016,M_best,-0.426,-0.22,Yes,early
5,Highlands,-0.2137,0.032,-6.709,0.0,-0.276,-0.151,early,2016,M_best,-0.276,-0.151,Yes,early
6,Lowlands,0.6257,0.029,21.658,0.0,0.569,0.682,early,2016,M_best,0.569,0.682,Yes,early
7,Open Slopes,0.2123,0.028,7.692,0.0,0.158,0.266,early,2016,M_best,0.158,0.266,Yes,early
8,Complex,0.4281,0.121,3.538,0.0,0.191,0.665,early,2016,M_best,0.191,0.665,Yes,early
9,dFence,-0.1338,0.01,-13.095,0.0,-0.154,-0.114,early,2016,M_best,-0.154,-0.114,Yes,early



---------------------- 2016: mid --------------------


Unnamed: 0,Param,coef,std err,z,P>|z|,[0.025,0.975],season_str,Year,Model,coef_ci_lwr,coef_ci_upr,significant,season
0,Intercept,-7.5475,0.028,-269.879,0.0,-7.602,-7.493,mid,2016,M_best,-7.602,-7.493,Yes,mid
1,C3,0.1876,0.069,2.72,0.007,0.052,0.323,mid,2016,M_best,0.052,0.323,Yes,mid
2,C4,0.3214,0.028,11.625,0.0,0.267,0.376,mid,2016,M_best,0.267,0.376,Yes,mid
3,Other,0.5047,0.119,4.238,0.0,0.271,0.738,mid,2016,M_best,0.271,0.738,Yes,mid
4,Saltgrass,-0.2798,0.057,-4.874,0.0,-0.392,-0.167,mid,2016,M_best,-0.392,-0.167,Yes,mid
5,Highlands,-0.1304,0.032,-4.102,0.0,-0.193,-0.068,mid,2016,M_best,-0.193,-0.068,Yes,mid
6,Lowlands,0.2687,0.028,9.492,0.0,0.213,0.324,mid,2016,M_best,0.213,0.324,Yes,mid
7,Open Slopes,-0.0139,0.028,-0.5,0.617,-0.068,0.041,mid,2016,M_best,-0.068,0.041,No,mid
8,Complex,0.2008,0.121,1.659,0.097,-0.036,0.438,mid,2016,M_best,-0.036,0.438,No,mid
9,dFence,-0.1311,0.01,-12.666,0.0,-0.151,-0.111,mid,2016,M_best,-0.151,-0.111,Yes,mid



---------------------- 2016: late --------------------


Unnamed: 0,Param,coef,std err,z,P>|z|,[0.025,0.975],season_str,Year,Model,coef_ci_lwr,coef_ci_upr,significant,season
0,Intercept,-7.5833,0.029,-262.783,0.0,-7.64,-7.527,late,2016,M_best,-7.64,-7.527,Yes,late
1,C3,0.396,0.056,7.114,0.0,0.287,0.505,late,2016,M_best,0.287,0.505,Yes,late
2,C4,0.1585,0.032,4.927,0.0,0.095,0.222,late,2016,M_best,0.095,0.222,Yes,late
3,Other,0.5158,0.164,3.139,0.002,0.194,0.838,late,2016,M_best,0.194,0.838,Yes,late
4,Saltgrass,0.5108,0.077,6.617,0.0,0.359,0.662,late,2016,M_best,0.359,0.662,Yes,late
5,Highlands,-0.2005,0.032,-6.298,0.0,-0.263,-0.138,late,2016,M_best,-0.263,-0.138,Yes,late
6,Lowlands,0.2189,0.029,7.647,0.0,0.163,0.275,late,2016,M_best,0.163,0.275,Yes,late
7,Open Slopes,-0.0278,0.027,-1.024,0.306,-0.081,0.025,late,2016,M_best,-0.081,0.025,No,late
8,Complex,0.0113,0.121,0.094,0.925,-0.225,0.248,late,2016,M_best,-0.225,0.248,No,late
9,dFence,-0.0286,0.01,-2.793,0.005,-0.049,-0.009,late,2016,M_best,-0.049,-0.009,Yes,late



---------------------- 2017: early --------------------


Unnamed: 0,Param,coef,std err,z,P>|z|,[0.025,0.975],season_str,Year,Model,coef_ci_lwr,coef_ci_upr,significant,season
0,Intercept,-7.3018,0.025,-286.612,0.0,-7.352,-7.252,early,2017,M_best,-7.352,-7.252,Yes,early
1,C3,0.0762,0.057,1.338,0.181,-0.035,0.188,early,2017,M_best,-0.035,0.188,No,early
2,C4,-0.3172,0.028,-11.413,0.0,-0.372,-0.263,early,2017,M_best,-0.372,-0.263,Yes,early
3,Other,0.1993,0.143,1.391,0.164,-0.082,0.48,early,2017,M_best,-0.082,0.48,No,early
4,Saltgrass,0.4074,0.086,4.759,0.0,0.24,0.575,early,2017,M_best,0.24,0.575,Yes,early
5,Highlands,-0.3725,0.032,-11.8,0.0,-0.434,-0.311,early,2017,M_best,-0.434,-0.311,Yes,early
6,Lowlands,0.3263,0.028,11.743,0.0,0.272,0.381,early,2017,M_best,0.272,0.381,Yes,early
7,Open Slopes,-0.0801,0.027,-2.93,0.003,-0.134,-0.027,early,2017,M_best,-0.134,-0.027,Yes,early
8,Complex,-0.0474,0.12,-0.393,0.694,-0.283,0.189,early,2017,M_best,-0.283,0.189,No,early
9,dFence,-0.0666,0.01,-6.763,0.0,-0.086,-0.047,early,2017,M_best,-0.086,-0.047,Yes,early



---------------------- 2017: mid --------------------


Unnamed: 0,Param,coef,std err,z,P>|z|,[0.025,0.975],season_str,Year,Model,coef_ci_lwr,coef_ci_upr,significant,season
0,Intercept,-7.0926,0.033,-215.135,0.0,-7.157,-7.028,mid,2017,M_best,-7.157,-7.028,Yes,mid
1,C3,-0.0784,0.12,-0.656,0.512,-0.313,0.156,mid,2017,M_best,-0.313,0.156,No,mid
2,C4,-0.0489,0.035,-1.381,0.167,-0.118,0.02,mid,2017,M_best,-0.118,0.02,No,mid
3,Other,-0.5674,0.22,-2.575,0.01,-0.999,-0.136,mid,2017,M_best,-0.999,-0.136,Yes,mid
4,Saltgrass,-0.1631,0.099,-1.653,0.098,-0.356,0.03,mid,2017,M_best,-0.356,0.03,No,mid
5,Highlands,-0.7045,0.038,-18.387,0.0,-0.78,-0.629,mid,2017,M_best,-0.78,-0.629,Yes,mid
6,Lowlands,-0.0406,0.034,-1.202,0.229,-0.107,0.026,mid,2017,M_best,-0.107,0.026,No,mid
7,Open Slopes,-0.3678,0.033,-11.168,0.0,-0.432,-0.303,mid,2017,M_best,-0.432,-0.303,Yes,mid
8,Complex,-0.5526,0.147,-3.771,0.0,-0.84,-0.265,mid,2017,M_best,-0.84,-0.265,Yes,mid
9,dFence,-0.0284,0.012,-2.285,0.022,-0.053,-0.004,mid,2017,M_best,-0.053,-0.004,Yes,mid



---------------------- 2017: late --------------------


Unnamed: 0,Param,coef,std err,z,P>|z|,[0.025,0.975],season_str,Year,Model,coef_ci_lwr,coef_ci_upr,significant,season
0,Intercept,-7.1994,0.029,-247.044,0.0,-7.257,-7.142,late,2017,M_best,-7.257,-7.142,Yes,late
1,C3,-0.2334,0.058,-4.029,0.0,-0.347,-0.12,late,2017,M_best,-0.347,-0.12,Yes,late
2,C4,-0.1522,0.035,-4.402,0.0,-0.22,-0.084,late,2017,M_best,-0.22,-0.084,Yes,late
3,Other,0.3943,0.153,2.578,0.01,0.095,0.694,late,2017,M_best,0.095,0.694,Yes,late
4,Saltgrass,0.2711,0.059,4.623,0.0,0.156,0.386,late,2017,M_best,0.156,0.386,Yes,late
5,Highlands,-0.3901,0.038,-10.321,0.0,-0.464,-0.316,late,2017,M_best,-0.464,-0.316,Yes,late
6,Lowlands,0.152,0.034,4.44,0.0,0.085,0.219,late,2017,M_best,0.085,0.219,Yes,late
7,Open Slopes,-0.1833,0.032,-5.696,0.0,-0.246,-0.12,late,2017,M_best,-0.246,-0.12,Yes,late
8,Complex,-0.2184,0.144,-1.516,0.129,-0.501,0.064,late,2017,M_best,-0.501,0.064,No,late
9,dFence,-0.0246,0.012,-2.024,0.043,-0.049,-0.001,late,2017,M_best,-0.049,-0.001,Yes,late



---------------------- 2018: early --------------------


Unnamed: 0,Param,coef,std err,z,P>|z|,[0.025,0.975],season_str,Year,Model,coef_ci_lwr,coef_ci_upr,significant,season
0,Intercept,-7.4493,0.033,-226.839,0.0,-7.514,-7.385,early,2018,M_best,-7.514,-7.385,Yes,early
1,C3,-0.1077,0.132,-0.814,0.416,-0.367,0.152,early,2018,M_best,-0.367,0.152,No,early
2,C4,-0.0368,0.035,-1.045,0.296,-0.106,0.032,early,2018,M_best,-0.106,0.032,No,early
3,Other,0.7635,0.158,4.832,0.0,0.454,1.073,early,2018,M_best,0.454,1.073,Yes,early
4,Saltgrass,0.5288,0.106,5.002,0.0,0.322,0.736,early,2018,M_best,0.322,0.736,Yes,early
5,Highlands,-0.1233,0.04,-3.1,0.002,-0.201,-0.045,early,2018,M_best,-0.201,-0.045,Yes,early
6,Lowlands,0.3438,0.034,10.004,0.0,0.276,0.411,early,2018,M_best,0.276,0.411,Yes,early
7,Open Slopes,-0.0514,0.034,-1.532,0.125,-0.117,0.014,early,2018,M_best,-0.117,0.014,No,early
8,Complex,0.0093,0.145,0.064,0.949,-0.275,0.294,early,2018,M_best,-0.275,0.294,No,early
9,dFence,-0.0673,0.012,-5.468,0.0,-0.091,-0.043,early,2018,M_best,-0.091,-0.043,Yes,early



---------------------- 2018: mid --------------------


Unnamed: 0,Param,coef,std err,z,P>|z|,[0.025,0.975],season_str,Year,Model,coef_ci_lwr,coef_ci_upr,significant,season
0,Intercept,-7.457,0.029,-258.01,0.0,-7.514,-7.4,mid,2018,M_best,-7.514,-7.4,Yes,mid
1,C3,0.3301,0.056,5.877,0.0,0.22,0.44,mid,2018,M_best,0.22,0.44,Yes,mid
2,C4,0.0197,0.032,0.621,0.534,-0.042,0.082,mid,2018,M_best,-0.042,0.082,No,mid
3,Other,0.5468,0.135,4.04,0.0,0.282,0.812,mid,2018,M_best,0.282,0.812,Yes,mid
4,Saltgrass,0.5401,0.058,9.326,0.0,0.427,0.654,mid,2018,M_best,0.427,0.654,Yes,mid
5,Highlands,-0.0517,0.035,-1.456,0.145,-0.121,0.018,mid,2018,M_best,-0.121,0.018,No,mid
6,Lowlands,0.0478,0.032,1.495,0.135,-0.015,0.111,mid,2018,M_best,-0.015,0.111,No,mid
7,Open Slopes,-0.0435,0.03,-1.429,0.153,-0.103,0.016,mid,2018,M_best,-0.103,0.016,No,mid
8,Complex,-0.024,0.137,-0.176,0.861,-0.293,0.245,mid,2018,M_best,-0.293,0.245,No,mid
9,dFence,-0.0099,0.011,-0.875,0.381,-0.032,0.012,mid,2018,M_best,-0.032,0.012,No,mid



---------------------- 2018: late --------------------


Unnamed: 0,Param,coef,std err,z,P>|z|,[0.025,0.975],season_str,Year,Model,coef_ci_lwr,coef_ci_upr,significant,season
0,Intercept,-7.1647,0.031,-233.78,0.0,-7.225,-7.105,late,2018,M_best,-7.225,-7.105,Yes,late
1,C3,-0.0927,0.07,-1.319,0.187,-0.23,0.045,late,2018,M_best,-0.23,0.045,No,late
2,C4,-0.1762,0.033,-5.37,0.0,-0.241,-0.112,late,2018,M_best,-0.241,-0.112,Yes,late
3,Other,0.1587,0.174,0.912,0.362,-0.182,0.5,late,2018,M_best,-0.182,0.5,No,late
4,Saltgrass,0.1659,0.069,2.402,0.016,0.031,0.301,late,2018,M_best,0.031,0.301,Yes,late
5,Highlands,0.0001,0.036,0.003,0.997,-0.071,0.071,late,2018,M_best,-0.071,0.071,No,late
6,Lowlands,0.1244,0.032,3.913,0.0,0.062,0.187,late,2018,M_best,0.062,0.187,Yes,late
7,Open Slopes,0.0785,0.031,2.559,0.011,0.018,0.139,late,2018,M_best,0.018,0.139,Yes,late
8,Complex,-0.2045,0.136,-1.508,0.132,-0.47,0.061,late,2018,M_best,-0.47,0.061,No,late
9,dFence,-0.0154,0.012,-1.323,0.186,-0.038,0.007,late,2018,M_best,-0.038,0.007,No,late


In [18]:
"""
FIGURE: Varibility in crude protein (a) and biomass (b) by sub-season and year
"""
fig, axs = plt.subplots(nrows=2, figsize=(8, 6), sharex=True)
p1 = sns.violinplot(x='season', y='CP_orig', hue='Year', bw=0.5, cut=0, linewidth=0.75, inner=None,
            data=df_wkly_grid_train, ax=axs[0], zorder=10)
#axs[0].fill_between(range(-1, 4), 8, 13, color='#117733', alpha=0.2, edgecolor=None, zorder=0)
p1.fill_between(np.arange(-0.5, 3), 7, 8, color='#DDCC77', alpha=0.4, edgecolor=None, zorder=0)
p1.fill_between(np.arange(-0.5, 3), 0, 7, color='#CC6677', alpha=0.4, edgecolor=None, zorder=0)
p1.set_ylim((5,13))
p1.set_xmargin(0.0)
p1.text(x=-0.9, y=13.0, s='(a)', fontsize=16.0)
h, l = p1.get_legend_handles_labels()
#leg.update_from(p1)
p1_mean = sns.pointplot(x='season', y='CP_orig',  hue='Year', ci=None,
          join=False, dodge=0.535, scale=0.4, color='black',
          data=df_wkly_grid_train, ax=axs[0])
p1_mean.legend().remove()
axs[0].set_xlabel(None)
p1.set_ylabel('Crude protein (%)')
p1.legend(h, l)

p2 = sns.violinplot(x='season', y='Biomass_orig', hue='Year', bw=0.5, cut=0, linewidth=0.75, inner=None,
            data=df_wkly_grid_train, ax=axs[1])
#axs[1].fill_between(range(-1, 4), 700, 3500, color='#117733', alpha=0.2, edgecolor=None, zorder=0)
p2.fill_between(np.arange(-0.5, 3), 350, 700, color='#DDCC77', alpha=0.4, edgecolor=None, zorder=0)
p2.fill_between(np.arange(-0.5, 3), 0, 350, color='#CC6677', alpha=0.4, edgecolor=None, zorder=0)
p2.set_ylim((0, 3500))
p2.legend().remove()
p2.set_xmargin(0.0)
p2.text(x=-0.9, y=3750, s='(b)', fontsize=16.0)
p2_mean = sns.pointplot(x='season', y='Biomass_orig',  hue='Year', ci=None,
          join=False, dodge=0.535, scale=0.4, color='black',
          data=df_wkly_grid_train, ax=axs[1])
p2_mean.legend().remove()
p2.set_xticklabels([x.get_text() + '-season' for x in p2.get_xticklabels()])
axs[1].set_xlabel(None)
p2.set_ylabel('Biomass (kg/ha)')

plt.savefig('C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/figures/violinplots_CP_BM_X_Year_X_Season.png', 
            bbox_inches='tight', dpi=300)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [19]:
"""
FIGURE: Covariance in crude protein and biomass by year, sub-season and pasture
"""
def get_cov_ellipse(cov, centre, nstd, **kwargs):
    from matplotlib.patches import Ellipse
    """
    Return a matplotlib Ellipse patch representing the covariance matrix
    cov centred at centre and scaled by the factor nstd.

    """

    # Find and sort eigenvalues and eigenvectors into descending order
    eigvals, eigvecs = np.linalg.eigh(cov)
    order = eigvals.argsort()[::-1]
    eigvals, eigvecs = eigvals[order], eigvecs[:, order]

    # The anti-clockwise angle to rotate our ellipse by 
    vx, vy = eigvecs[:,0][0], eigvecs[:,0][1]
    theta = np.arctan2(vy, vx)

    # Width and height of ellipse to draw
    width, height = 2 * nstd * np.sqrt(eigvals)
    return Ellipse(xy=centre, width=width, height=height,
                   angle=np.degrees(theta), **kwargs)

def draw_cov_ellipse(*args, **kwargs):
    data = kwargs.pop('data')
    c = kwargs.pop('color')
    x = kwargs.pop('x')
    y = kwargs.pop('y')
    cov = np.cov(data.dropna(axis=0)[x], data.dropna(axis=0)[y])
    e = get_cov_ellipse(cov, (data.dropna(axis=0)[x].mean(), 
                              data.dropna(axis=0)[y].mean()), 2, fc='none', ec=c, alpha=1.0)
    ax = plt.gca()
    ax.scatter(data.dropna(axis=0)[x], data.dropna(axis=0)[y], s=0.0, alpha=0.3)
    ax.add_artist(e)

g = sns.FacetGrid(data=df_wkly_grid_train,#[df_wkly_grid_train['Pasture'].isin(['17N', '20SE'])],#[df_wkly_grid_train['week'].isin([23, 29, 36])], 
                 col='season', row='Year', hue='Pasture', legend_out=True, margin_titles=True)
g.map_dataframe(sns.scatterplot, x='Biomass_orig', y='CP_orig', edgecolor=None, alpha=0.1, s=4.0).add_legend()
g.map_dataframe(draw_cov_ellipse, x='Biomass_orig', y='CP_orig')#.add_legend()
g.set_xlabels('Biomass (kg/ha)')
g.set_ylabels('Crude protein (%)')

leg = g.legend
for hand in leg.legendHandles:
    hand._sizes = [22.0]
    hand.set_alpha(1.0)

for ax in g.axes.flat:
    ax.fill_between(np.arange(0, 3600), 7, 8,
                       color='#DDCC77', alpha=0.4, edgecolor=None, zorder=0)
    ax.fill_between(np.arange(0, 3600), 5.5, 7,
                       color='#CC6677', alpha=0.4, edgecolor=None, zorder=0)
    ax.fill_between(range(350, 700), 5.5, 12.5, 
                    color='#DDCC77', alpha=0.4, edgecolor=None, zorder=0)
    ax.fill_between(range(0, 350), 5.5, 12.5, 
                    color='#CC6677', alpha=0.4, edgecolor=None, zorder=0)
    plt.setp(ax.texts, text="") # remove the original texts

g.set_titles(row_template = '{row_name}', col_template = '{col_name}-season', fontweight='bold')
plt.savefig('C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/figures/covariance_CP_BM_X_Year_X_Season_X_Pasture.png', 
            bbox_inches='tight', dpi=300)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [20]:
"""
FIGURE: Varibility in crude protein (a) and biomass (b) by sub-season across plant communities
"""
fig, axs = plt.subplots(nrows=2, ncols=len(df_wkly_grid_train['Year'].unique()),
                        figsize=(12, 6), sharex=True, gridspec_kw={'right': 0.825})
for idx, yr in enumerate(df_wkly_grid_train['Year'].unique()):
    p1 = sns.violinplot(x='season', y='CP_orig', hue='PC_dmt',  bw=0.5, cut=0, inner=None, linewidth=0.75,
                data=df_wkly_grid_train[df_wkly_grid_train['Year'] == yr], ax=axs[0, idx],
                       hue_order=['C4', 'C3_C4_mix', 'C3', 'Saltgrass', 'Other'])
    
    axs[0, idx].fill_between(np.arange(-0.5, 3), 7, 8, color='#DDCC77', alpha=0.4, edgecolor=None, zorder=0)
    axs[0, idx].fill_between(np.arange(-0.5, 3), 0, 7, color='#CC6677', alpha=0.4, edgecolor=None, zorder=0)
    handles, labels = axs[0,idx].get_legend_handles_labels()
    labels = ['C4/C3 mix' if x == 'C3_C4_mix' else x for x in labels] 
    p1.legend().remove()
    p1.set_ylim((5,13))
    p1.set_xmargin(0.0)
    p1.set_title(yr, size=12.0)
    p1_mean = sns.pointplot(x='season', y='CP_orig',  hue='PC_dmt', ci=None,
                  hue_order=['C4', 'C3_C4_mix', 'C3', 'Saltgrass', 'Other'], join=False, dodge=0.65, scale=0.3, color='black',
                  data=df_wkly_grid_train[df_wkly_grid_train['Year'] == yr], ax=axs[0, idx])
    p1_mean.legend().remove()
    axs[0,idx].set_ylabel('Crude protein (%)', size=12, labelpad=5)
    axs[0,idx].set_xlabel(None)

    p2 = sns.violinplot(x='season', y='Biomass_orig', hue='PC_dmt',  bw=0.5, cut=0, inner=None, linewidth=0.75,
                data=df_wkly_grid_train[df_wkly_grid_train['Year'] == yr], ax=axs[1, idx],
                       hue_order=['C4', 'C3_C4_mix', 'C3', 'Saltgrass', 'Other'])
    axs[1,idx].fill_between(np.arange(-0.5, 3), 350, 700, color='#DDCC77', alpha=0.4, edgecolor=None, zorder=0)
    axs[1,idx].fill_between(np.arange(-0.5, 3), 0, 350, color='#CC6677', alpha=0.4, edgecolor=None, zorder=0)
    p2.legend().remove()
    p2.set_ylim((0, 3500))
    p2.set_xmargin(0.0)
    p2_mean = sns.pointplot(x='season', y='Biomass_orig',  hue='PC_dmt', ci=None,
              hue_order=['C4', 'C3_C4_mix', 'C3', 'Saltgrass', 'Other'], join=False, dodge=0.65, scale=0.3, color='black',
              data=df_wkly_grid_train[df_wkly_grid_train['Year'] == yr], ax=axs[1, idx])
    p2_mean.legend().remove()
    axs[1,idx].set_ylabel('Biomass (kg/ha)', size=12, labelpad=5)
    axs[1,idx].set_xlabel(None)

for idx, ax in enumerate(axs.flatten()):
    ax.set_xticklabels(ax.get_xticklabels(), size=12.0)
    ax.set_xticklabels(ax.get_xticklabels(), size=12.0)
    ax.tick_params(axis='y', labelsize=12.0)
    if (idx == 0) | (idx == len(axs.flatten())/axs.shape[0]):
        continue
    else:
        ax.axes.yaxis.set_ticks([])
        ax.set_ylabel('')
    if idx == 4:
        ax.set_xlabel('Sub-season', size=12.0)

fig.legend(handles, labels, loc='center right', 
           title='Plant community', fontsize=12.0, title_fontsize=12.0)
plt.savefig('C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/figures/violinplots_CP_BM_X_Season_X_PCdmt.png', 
            bbox_inches='tight', dpi=300)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [21]:
"""
FIGURE: Varibility in crude protein (a) and biomass (b) by sub-season across plant communities
"""
fig, axs = plt.subplots(nrows=2, ncols=len(df_wkly_grid_train['Year'].unique()),
                        figsize=(12, 6), sharex=True, gridspec_kw={'right': 0.825})
for idx, yr in enumerate(df_wkly_grid_train['Year'].unique()):
    p1 = sns.violinplot(x='season', y='CP_orig', hue='TPC_c',  bw=0.5, cut=0, inner=None, linewidth=0.75,
                data=df_wkly_grid_train[df_wkly_grid_train['Year'] == yr], ax=axs[0, idx],
                       hue_order=['Highlands', 'Open Slopes', 'Flat Plains', 'Lowlands', 'Other'])
    
    axs[0, idx].fill_between(np.arange(-0.5, 3), 7, 8, color='#DDCC77', alpha=0.4, edgecolor=None, zorder=0)
    axs[0, idx].fill_between(np.arange(-0.5, 3), 0, 7, color='#CC6677', alpha=0.4, edgecolor=None, zorder=0)
    handles, labels = axs[0,idx].get_legend_handles_labels()
    p1.legend().remove()
    p1.set_ylim((5,13))
    p1.set_xmargin(0.0)
    p1.set_title(yr, size=12.0)
    p1_mean = sns.pointplot(x='season', y='CP_orig',  hue='TPC_c', ci=None,
                  hue_order=['Highlands', 'Open Slopes', 'Flat Plains', 'Lowlands', 'Other'], join=False, dodge=0.65, scale=0.3, color='black',
                  data=df_wkly_grid_train[df_wkly_grid_train['Year'] == yr], ax=axs[0, idx])
    p1_mean.legend().remove()
    axs[0,idx].set_ylabel('Crude protein (%)', size=12, labelpad=5)
    axs[0,idx].set_xlabel(None)

    p2 = sns.violinplot(x='season', y='Biomass_orig', hue='TPC_c',  bw=0.5, cut=0, inner=None, linewidth=0.75,
                data=df_wkly_grid_train[df_wkly_grid_train['Year'] == yr], ax=axs[1, idx],
                       hue_order=['Highlands', 'Open Slopes', 'Flat Plains', 'Lowlands', 'Other'])
    axs[1,idx].fill_between(np.arange(-0.5, 3), 350, 700, color='#DDCC77', alpha=0.4, edgecolor=None, zorder=0)
    axs[1,idx].fill_between(np.arange(-0.5, 3), 0, 350, color='#CC6677', alpha=0.4, edgecolor=None, zorder=0)
    p2.legend().remove()
    p2.set_ylim((0, 3500))
    p2.set_xmargin(0.0)
    p2_mean = sns.pointplot(x='season', y='Biomass_orig',  hue='TPC_c', ci=None,
              hue_order=['Highlands', 'Open Slopes', 'Flat Plains', 'Lowlands', 'Other'], join=False, dodge=0.65, scale=0.3, color='black',
              data=df_wkly_grid_train[df_wkly_grid_train['Year'] == yr], ax=axs[1, idx])
    p2_mean.legend().remove()
    axs[1,idx].set_ylabel('Biomass (kg/ha)', size=12, labelpad=5)
    axs[1,idx].set_xlabel(None)

for idx, ax in enumerate(axs.flatten()):
    ax.set_xticklabels(ax.get_xticklabels(), size=12.0)
    ax.set_xticklabels(ax.get_xticklabels(), size=12.0)
    ax.tick_params(axis='y', labelsize=12.0)
    if (idx == 0) | (idx == len(axs.flatten())/axs.shape[0]):
        continue
    else:
        ax.axes.yaxis.set_ticks([])
        ax.set_ylabel('')
    if idx == 4:
        ax.set_xlabel('Sub-season', size=12.0)

fig.legend(handles, labels, loc='center right', 
           title='Topographic position', fontsize=12.0, title_fontsize=12.0)
plt.savefig('C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/figures/violinplots_CP_BM_X_Season_X_TPC.png', 
            bbox_inches='tight', dpi=300)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [22]:
test = df_wkly_grid_train.groupby(
                ['TPC_c', 'PC_dmt']).count()['Year'].reset_index()
test['prop'] = test['Year'] / test.groupby('TPC_c')['Year'].transform('sum')
test = test.pivot(columns='PC_dmt', index='TPC_c', values='prop')

In [23]:
test.plot(kind='bar', stacked=True)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:xlabel='TPC_c'>

In [24]:
"""
Create dataframe of plant community coefficients by model and season
"""

pd.set_option('display.max_rows', 100)
#coefs_PC_seas = df_coefs[df_coefs['Param'].str.contains('C\(PC_dmt\, Treatment\(reference\="C3_C4_mix"\)\)\[.*\]$', regex=True)].copy(deep=True)
coefs_PC_seas = df_coefs_seas[df_coefs_seas['Param'].isin(['C4', 'C3', 'Saltgrass', 'Other'])].copy(deep=True)
#coefs_PC_seas['Param'] = coefs_PC_seas['Param'].apply(lambda x: re.sub('C\(PC_dmt, Treatment\(reference\="C3_C4_mix"\)\)\[T\.', '', x))
#coefs_PC_seas['Param'] = coefs_PC_seas['Param'].apply(lambda x: re.sub('\]', '', x))
#coefs_PC_seas = coefs_PC_seas.groupby(['Year', 'Season', 'week', 'Pasture', 'Model', 'Formula', 'Param']).mean().reset_index()
#coefs_PC_seas = coefs_PC_seas.join(df_coefs_seas.set_index(['Model', 'Year', 'Season', 'Param']).drop(columns=['coef']),
#                                                           on=['Model', 'Year', 'Season', 'Param'])
coefs_PC_seas['season'] = pd.Categorical(coefs_PC_seas['season_str'], ['early', 'mid', 'late'])
coefs_PC_seas['Param'] = pd.Categorical(coefs_PC_seas['Param'], ['C4', 'C3', 'Saltgrass', 'Other'])
coefs_PC_seas_grps = []
for grp, data in coefs_PC_seas[coefs_PC_seas['Model'].isin(['M_best'])].groupby(
    ['Year', 'season', 'Param', 'significant']).mean().reset_index().groupby(['Year', 'season']):
    coefs_PC_seas_grps.append(data.sort_values('Param'))

#coefs_PC_seas_agg = coefs_PC_seas.groupby(['Model', 'Year', 'Season', 'week', 'Pasture', 'Param']).mean().reset_index().dropna().groupby(['Model', 'Season', 'Param'])['coef'].agg(
#    [('coef_mean', np.mean),
#     ('coef_lwr95', lambda x: sms.DescrStatsW(x).zconfint_mean()[0]),
#     ('coef_upr95', lambda x: sms.DescrStatsW(x).zconfint_mean()[1]),
#     ('pval', lambda x: np.round(ttest_1samp(x, 0.0)[1], 4))])#.reset_index()


In [25]:
#coefs_PC_seas_best
#df_coefs_seas[(df_coefs_seas['Model'] == 'M_best') & (df_coefs_seas['Year'] == 2016)]

In [26]:
"""
Figure: Plant community coefficients and 95% CI by sub-season and by pasture for the plant-community-only model (M3_pc) and best model (M5)
"""
coefs_PC_seas_best = coefs_PC_seas[coefs_PC_seas['Model'].isin(['M_best']) &
                                   (coefs_PC_seas['significant'] == 'Yes')].copy(deep=True)
import matplotlib.ticker as ticker
g = sns.FacetGrid(coefs_PC_seas_best, margin_titles=True, height=2.25, aspect=1.5,
                  col='season', row='Year', legend_out=True, sharey=True, gridspec_kws={"hspace":0.25, "wspace":0.1})
#plt.figure()
g.map_dataframe(sns.pointplot, x='coef', y='Param', estimator=np.mean,
                join=False, dodge=None, scale=0.75, errwidth=0.75, ci=None, color='black')

#sns.pointplot()
axs = g.axes
for idx, ax in enumerate(axs.flat):
    #ax.collections[0]
    ax.set_xlim((-1.5, 1.5))
    ax.axvline(x=0, linestyle='dotted', color='black')
    box = ax.get_position()
    #ax.set_position([box.x0, box.y0, box.width, box.height*0.75])
    cur_title = ax.get_title()
    ax.xaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: '{:,.1f}'.format(x)))
    plt.setp(ax.texts, text="") # remove the original texts
    df_tmp = coefs_PC_seas_grps[idx].copy(deep=True)
    df_tmp['lwr'] = df_tmp['coef'] - df_tmp['coef_ci_lwr']
    df_tmp['upr'] = df_tmp['coef_ci_upr'] - df_tmp['coef'] 
    df_tmp_ns = df_tmp[df_tmp['significant'] == 'No'].copy(deep=True)
    df_tmp = df_tmp[df_tmp['significant'] == 'Yes'].copy(deep=True)
    ax.errorbar(x=df_tmp['coef'],
                y=df_tmp['Param'], 
                xerr=(df_tmp[['lwr', 'upr']].values).transpose(),
                color='black', fmt='o', zorder=0)
    ax.errorbar(x=df_tmp_ns['coef'],
            y=df_tmp_ns['Param'], 
            xerr=(df_tmp_ns[['lwr', 'upr']].values).transpose(), fmt='o', markerfacecolor='white', color='black', zorder=0)

g.set_titles(row_template = '{row_name}', col_template = '{col_name}-season', fontweight='bold')
g.set(xticks=np.arange(-1.5, 1.5, 0.5))
g.set_xlabels('Coefficient')
plt.savefig('C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/figures/PC_dmt.png', 
            bbox_inches='tight', dpi=300)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

  self.fig.tight_layout(*args, **kwargs)
  self.fig.tight_layout(*args, **kwargs)
  xys = np.asarray(xys)
  xys = np.asarray(xys)
  xys = np.asarray(xys)
  xys = np.asarray(xys)


In [27]:
"""
Create dataframe of topographic position class coefficients by model and season
"""
#coefs_TPC_seas = df_coefs[df_coefs['Param'].str.contains('C\(TPC_c\, Treatment\(reference\="Flat Plains"\)\)\[.*\]$', regex=True)].copy(deep=True)
coefs_TPC_seas = df_coefs_seas[df_coefs_seas['Param'].isin(['Highlands', 'Open Slopes', 'Lowlands', 'Complex'])]
#coefs_TPC_seas['Param'] = coefs_TPC_seas['Param'].apply(lambda x: re.sub('C\(TPC_c, Treatment\(reference\="Flat Plains"\)\)\[T\.', '', x))
#coefs_TPC_seas['Param'] = coefs_TPC_seas['Param'].apply(lambda x: re.sub('\]', '', x))
#coefs_TPC_seas = coefs_TPC_seas.groupby(['Year', 'Season', 'week', 'Pasture', 'Model', 'Formula', 'Param']).mean().reset_index()
#coefs_TPC_seas = coefs_TPC_seas.join(df_coefs_seas.set_index(['Model', 'Year', 'Season', 'Param']).drop(columns=['coef']),
#                                                           on=['Model', 'Year', 'Season', 'Param'])
coefs_TPC_seas['season'] = pd.Categorical(coefs_TPC_seas['season_str'], ['early', 'mid', 'late'])
coefs_TPC_seas['Param'] = pd.Categorical(coefs_TPC_seas['Param'], ['Highlands', 'Open Slopes', 'Lowlands', 'Complex'])
coefs_TPC_seas_grps = []
for grp, data in coefs_TPC_seas[coefs_TPC_seas['Model'].isin(['M_best'])].groupby(
    ['Year', 'season', 'Param', 'significant']).mean().reset_index().groupby(['Year', 'season']):
    coefs_TPC_seas_grps.append(data.sort_values('Param'))
    
#coefs_TPC_seas_agg = coefs_TPC_seas.groupby(['Model','week', 'Season', 'Year',  'Pasture', 'Param']).mean().reset_index().dropna().groupby(['Model', 'Season', 'Param'])['coef'].agg(
#    [('coef_mean', np.mean),
#     ('coef_lwr95', lambda x: sms.DescrStatsW(x).zconfint_mean()[0]),
#     ('coef_upr95', lambda x: sms.DescrStatsW(x).zconfint_mean()[1]),
#     ('pval', lambda x: np.round(ttest_1samp(x, 0.0)[1], 4))])#.reset_index()
#coefs_TPC_seas_agg[coefs_TPC_seas_agg.index.isin(['M1', 'M_best'], level='Model')].dropna()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._set_item(key, value)


In [28]:
#coefs_TPC_seas

In [29]:
"""
Figure: Plant community coefficients and 95% CI by sub-season and by pasture for the plant-community-only model (M3_pc) and best model (M5)
"""
coefs_TPC_seas_best = coefs_TPC_seas[coefs_TPC_seas['Model'].isin(['M_best']) & (coefs_TPC_seas['significant'] == 'Yes')].copy(deep=True)
import matplotlib.ticker as ticker
g = sns.FacetGrid(coefs_TPC_seas_best, margin_titles=True, height=2.25, aspect=1.5,
                  col='season', row='Year', legend_out=True, sharey=True, gridspec_kws={"hspace":0.25, "wspace":0.1})
#plt.figure()
g.map_dataframe(sns.pointplot, x='coef', y='Param', estimator=np.mean,
                join=False, dodge=None, scale=0.75, errwidth=0.75, ci=None, color='black')

#sns.pointplot()
axs = g.axes
for idx, ax in enumerate(axs.flat):
    #ax.collections[0]
    ax.set_xlim((-1.5, 1.5))
    ax.axvline(x=0, linestyle='dotted', color='black')
    box = ax.get_position()
    #ax.set_position([box.x0, box.y0, box.width, box.height*0.75])
    cur_title = ax.get_title()
    ax.xaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: '{:,.1f}'.format(x)))
    plt.setp(ax.texts, text="") # remove the original texts
    df_tmp = coefs_TPC_seas_grps[idx].copy(deep=True)
    df_tmp['lwr'] = df_tmp['coef'] - df_tmp['coef_ci_lwr']
    df_tmp['upr'] = df_tmp['coef_ci_upr'] - df_tmp['coef'] 
    df_tmp_ns = df_tmp[df_tmp['significant'] == 'No'].copy(deep=True)
    df_tmp = df_tmp[df_tmp['significant'] == 'Yes'].copy(deep=True)
    ax.errorbar(x=df_tmp['coef'],
                y=df_tmp['Param'], 
                xerr=(df_tmp[['lwr', 'upr']].values).transpose(),
                color='black', fmt='o', zorder=0)
    ax.errorbar(x=df_tmp_ns['coef'],
            y=df_tmp_ns['Param'], 
            xerr=(df_tmp_ns[['lwr', 'upr']].values).transpose(), fmt='o', markerfacecolor='white', color='black', zorder=0)

g.set_titles(row_template = '{row_name}', col_template = '{col_name}-season', fontweight='bold')
g.set(xticks=np.arange(-1.5, 1.5, 0.5))
g.set_xlabels('Coefficient')
plt.savefig('C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/figures/TPC.png', 
            bbox_inches='tight', dpi=300)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

  self.fig.tight_layout(*args, **kwargs)
  self.fig.tight_layout(*args, **kwargs)
  xys = np.asarray(xys)
  xys = np.asarray(xys)


In [30]:
"""
Check if biomass and CP are significant in M_best models
"""
display(df_coefs_seas[(df_coefs_seas['Model'] == 'M_best') &
              (df_coefs_seas['Param'].str.contains('CP'))].sort_values(['Year', 'season']))
display(df_coefs_seas[(df_coefs_seas['Model'] == 'M_best') &
              (df_coefs_seas['Param'].str.contains('Biomass'))].sort_values(['Year', 'season']))

Unnamed: 0,Param,coef,std err,z,P>|z|,[0.025,0.975],season_str,Year,Model,coef_ci_lwr,coef_ci_upr,significant,season
21,CP,0.0535,0.045,1.196,0.232,-0.034,0.141,early,2016,M_best,-0.034,0.141,No,early
22,C3:CP,-0.1436,0.157,-0.912,0.362,-0.452,0.165,early,2016,M_best,-0.452,0.165,No,early
23,C4:CP,0.3825,0.055,6.916,0.0,0.274,0.491,early,2016,M_best,0.274,0.491,Yes,early
24,Other:CP,0.3873,0.169,2.292,0.022,0.056,0.718,early,2016,M_best,0.056,0.718,Yes,early
25,Saltgrass:CP,0.2093,0.086,2.445,0.014,0.042,0.377,early,2016,M_best,0.042,0.377,Yes,early
26,Biomass:CP,0.1096,0.024,4.641,0.0,0.063,0.156,early,2016,M_best,0.063,0.156,Yes,early
27,I(Biomass ** 2):CP,0.0317,0.013,2.464,0.014,0.006,0.057,early,2016,M_best,0.006,0.057,Yes,early
35,PC_div:CP,-0.0652,0.024,-2.685,0.007,-0.113,-0.018,early,2016,M_best,-0.113,-0.018,Yes,early
13,CP,0.2236,0.046,4.905,0.0,0.134,0.313,mid,2016,M_best,0.134,0.313,Yes,mid
14,C3:CP,-0.3413,0.094,-3.636,0.0,-0.525,-0.157,mid,2016,M_best,-0.525,-0.157,Yes,mid


Unnamed: 0,Param,coef,std err,z,P>|z|,[0.025,0.975],season_str,Year,Model,coef_ci_lwr,coef_ci_upr,significant,season
11,Biomass,-0.1782,0.024,-7.363,0.0,-0.226,-0.131,early,2016,M_best,-0.226,-0.131,Yes,early
12,C3:Biomass,-0.0861,0.07,-1.228,0.22,-0.223,0.051,early,2016,M_best,-0.223,0.051,No,early
13,C4:Biomass,0.0828,0.031,2.653,0.008,0.022,0.144,early,2016,M_best,0.022,0.144,Yes,early
14,Other:Biomass,-0.2329,0.082,-2.851,0.004,-0.393,-0.073,early,2016,M_best,-0.393,-0.073,Yes,early
15,Saltgrass:Biomass,0.2801,0.077,3.635,0.0,0.129,0.431,early,2016,M_best,0.129,0.431,Yes,early
16,I(Biomass ** 2),0.0721,0.015,4.677,0.0,0.042,0.102,early,2016,M_best,0.042,0.102,Yes,early
17,C3:I(Biomass ** 2),-0.0989,0.043,-2.324,0.02,-0.182,-0.015,early,2016,M_best,-0.182,-0.015,Yes,early
18,C4:I(Biomass ** 2),0.0478,0.022,2.21,0.027,0.005,0.09,early,2016,M_best,0.005,0.09,Yes,early
19,Other:I(Biomass ** 2),-0.0586,0.024,-2.439,0.015,-0.106,-0.012,early,2016,M_best,-0.106,-0.012,Yes,early
20,Saltgrass:I(Biomass ** 2),-0.0847,0.03,-2.85,0.004,-0.143,-0.026,early,2016,M_best,-0.143,-0.026,Yes,early


In [35]:
"""
Compare models by season: how does selection change with biomass and relative CP (within season)
"""
cp_q_dict = {
    'Lowest': 0.20,
    'Below avg': 0.40,
    'Average': 0.60,
    'Above avg': 0.80,
    'Highest': 0.95
}
ref_TPC = 'Flat Plains'
ref_pc = 'C3_C4_mix'
df_pred = pd.DataFrame(columns=['Year', 'season','dFence_orig', 'dTank_orig', 'Biomass_orig', 'CP_orig','PC_div_orig',
                                'PC_dmt', 'TPC_c', 'Model'])
for mod in tqdm(['M_best']):
    for (yr, seas), df_sub in df_wkly_grid_full[df_wkly_grid_full['mod_data'] == 'train'].groupby(['Year', 'season']):
        if ref_TPC is not None:
            df_tmp = df_sub[(df_sub['TPC_c'] == ref_TPC) & (df_sub['PC_dmt'] == ref_pc)]
        else:
            df_tmp = df_sub[(df_sub['PC_dmt'] == ref_pc)]
        if len(df_sub) < 50:
            continue
        else:
            cp_old = df_tmp['CP_orig'].quantile(0.05)
            for cp_q in cp_q_dict:
                df_pred = df_pred.append(pd.DataFrame({
                    'Year': yr,
                    'season': seas,
                    'Relative CP': cp_q,
                    'dFence_orig': df_tmp['dFence_orig'].median(),
                    'dTank_orig': df_tmp['dTank_orig'].median(),
                    'Biomass_orig': np.arange(df_tmp['Biomass_orig'][(df_tmp['CP_orig'] < df_tmp['CP_orig'].quantile(cp_q_dict[cp_q])) & 
                                                                     (df_tmp['CP_orig'] > cp_old)].quantile(0.001), 
                         df_tmp['Biomass_orig'][(df_tmp['CP_orig'] < df_tmp['CP_orig'].quantile(cp_q_dict[cp_q])) & 
                                                                     (df_tmp['CP_orig'] > cp_old)].quantile(0.999),  10),
                    'CP_orig': df_tmp['CP_orig'].quantile(cp_q_dict[cp_q]),
                    'PC_div_orig': df_tmp['PC_div_orig'].median(),
                    'PC_dmt': ref_pc,
                    'TPC_c': ref_TPC,
                    'Model': mod
                }))
                cp_old = df_tmp['CP_orig'].quantile(cp_q_dict[cp_q])
        for c in ['dFence', 'dTank', 'Biomass', 'CP', 'PC_div']:
            scaler = scaler_dict[yr][seas][c]
            df_pred.loc[(df_pred['Year'] == yr) & 
                        (df_pred['season'] == seas), c] = scaler.transform(
                df_pred.loc[(df_pred['Year'] == yr) & 
                            (df_pred['season'] == seas), c + '_orig'].values.reshape(-1, 1)).flatten()
        df_pred.loc[(df_pred['Year'] == yr) & 
                    (df_pred['season'] == seas) & 
                    (df_pred['Model'] == mod), 
                    'mod_pred'] = yrly_mod_dict[yr][seas][mod].predict(df_pred.loc[(df_pred['Year'] == yr) & 
                                                                                   (df_pred['season'] == seas) & 
                                                                                   (df_pred['Model'] == mod)])
        
df_pred['season'] = pd.Categorical(df_pred['season'], ['early', 'mid', 'late'])
df_pred['pred_rel_freq'] = np.nan
for yr in df_pred.Year.unique():
    for seas in df_pred.season.unique():
        df_wkly_grid_sub = df_wkly_grid_full[(df_wkly_grid_full['Year'] == yr) & 
                                             (df_wkly_grid_full['season'] == seas) & 
                                             (df_wkly_grid_full['mod_data'] == 'train')]
        df_pred.loc[(df_pred['Year'] == yr) & 
                    (df_pred['season'] == seas), 'pred_rel_freq'] = (df_pred.loc[(df_pred['Year'] == yr) &
                                                                                 (df_pred['season'] == seas), 
                                                                                 'mod_pred'] * df_wkly_grid_sub['grazing_wkly_sum'].mean()) / (df_wkly_grid_sub.groupby('Pasture')['grazing_wkly_sum'].mean().mean() / df_wkly_grid_sub.groupby(
            ['Pasture'])['grazing_wkly_sum'].count().mean())

  0%|          | 0/1 [00:00<?, ?it/s]

In [36]:
"""
Plot marginal effects of biomass and relative CP by model and season
"""
from matplotlib.ticker import MultipleLocator
x_var = 'Biomass_orig'
vmin, vmax = int(df_pred['CP_orig'].min()), np.ceil(df_pred['CP_orig'].max())
cmap = sns.color_palette("viridis", as_cmap=True)

g = sns.FacetGrid(df_pred[df_pred['Model'] == 'M_best'], 
                  col='season', row='Year', hue='CP_orig', 
                  col_order=['early', 'mid', 'late'], margin_titles=True,
                  legend_out=True, sharey=False, sharex=True, palette='viridis',
                  height=2.25, aspect=1.75, gridspec_kws={"hspace":0.2, "wspace":0.0})
#plt.figure()
g.map_dataframe(sns.lineplot, x=x_var, y='pred_rel_freq')#.add_legend()
g.set_xlabels('Biomass (kg/ha)')
g.set_ylabels(None)
axes = g.fig.axes
for idx, ax in enumerate(axes):
    ax.axhline(y=1.0, linestyle='dashed', color='grey')
    ax.set_ylim((0.0, ax.get_ylim()[-1]))
    if ax.get_ylim()[-1] < 3.0:
        ax.set_ylim((0.0, 3.0))
    elif ax.get_ylim()[-1] < 5.0:
        ax.set_ylim((0.0, 5.0))
    elif ax.get_ylim()[-1] < 10.0:
        ax.set_ylim((0.0, 10.0))
    ax.set_xlim((0, 2600))
    cur_title = ax.get_title()
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width*0.80, box.height])
    ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: '{:,.0f}'.format(x)))
    ax.xaxis.set_minor_locator(MultipleLocator(250))
    #ax.axvline(x=500, linestyle='solid', color='red')
    #ax.fill_between(range(700, 3500), 0, 1, transform=ax.get_xaxis_transform(), color='#117733', alpha=0.2, edgecolor=None, zorder=0)
    ax.fill_between(range(350, 700), 0, 1, transform=ax.get_xaxis_transform(), color='#DDCC77', alpha=0.4, edgecolor=None, zorder=0)
    ax.fill_between(range(0, 350), 0, 1, transform=ax.get_xaxis_transform(), color='#CC6677', alpha=0.4, edgecolor=None, zorder=0)
    plt.setp(ax.texts, text="") # remove the original texts
    if idx == 3:
        ax.set_ylabel('Relative probability of selection', labelpad=10)

# Define a new Axes where the colorbar will go
cax = g.fig.add_axes([.90, .20, .02, .6])
cax.set_title('CP')
# Get a mappable object with the same colormap as the data
points = plt.scatter([], [], c=[], vmin=vmin, vmax=vmax, cmap=cmap)
# Draw the colorbar
g.fig.colorbar(points, cax=cax)
g.set_titles(row_template = '{row_name}', col_template = '{col_name}-season', fontweight='bold')
g.set(xticks=np.arange(0, 3000, 500))
#g.fig.suptitle('Marginal effects of Biomass and Relative CP')
plt.savefig('C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/figures/Biomass_X_CP.png', 
            bbox_inches='tight', dpi=300)
#plt.tight_layout()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

  self.fig.tight_layout(*args, **kwargs)
  self.fig.tight_layout(*args, **kwargs)


In [33]:
"""
Compare top model by plant community diversity: how does selection change with plant community diversity within dominant plant communities?
"""
ref_TPC = 'Flat Plains'
df_pred = pd.DataFrame(columns=['Year', 'season', 'dFence_orig', 'dTank_orig', 'Biomass_orig', 'CP_orig','PC_div_orig',
                                'PC_dmt', 'TPC_c'])
for (yr, seas), df_sub in df_wkly_grid_full[df_wkly_grid_full['mod_data'] == 'train'].groupby(['Year', 'season']):
    print(yr, seas)
    if ref_TPC is not None:
        df_tmp = df_sub[(df_sub['TPC_c'] == ref_TPC)]
    else:
        df_tmp = df_sub
    if not (coefs_dict[yr][seas]['M_best']['Param'] == 'PC_div').any():
        continue
    if np.sign(coefs_dict[yr][seas]['M_best'].set_index('Param').loc['PC_div']['[0.025']) != \
    np.sign(coefs_dict[yr][seas]['M_best'].set_index('Param').loc['PC_div']['0.975]']):
        continue
    if (coefs_dict[yr][seas]['M_best']['Param'] == 'C3').any():
        if (coefs_dict[yr][seas]['M_best']['Param'] == 'C3:PC_div').any():
            for ref_pc in df_wkly_grid_full['PC_dmt'][df_wkly_grid_full['mod_data'] == 'train'].unique():
                df_tmp2 = df_tmp[df_tmp['PC_dmt'] == ref_pc]
                if len(df_tmp2) < 50:
                    continue
                if ref_pc != 'C3_C4_mix':
                    if np.sign(coefs_dict[yr][seas]['M_best'].set_index('Param').loc[ref_pc + ':PC_div']['[0.025']) != \
                    np.sign(coefs_dict[yr][seas]['M_best'].set_index('Param').loc[ref_pc + ':PC_div']['0.975]']):
                        continue
                df_pred = df_pred.append(pd.DataFrame({
                    'Year': yr,
                    'season': seas,
                    'dFence_orig': df_tmp2['dFence_orig'].median(),
                    'dTank_orig': df_tmp2['dTank_orig'].median(),
                    'Biomass_orig': df_tmp2['Biomass_orig'].median(),
                    'CP_orig': df_tmp2['CP_orig'].median(),
                    'PC_div_orig': np.linspace(df_tmp2['PC_div_orig'].quantile(0.005),
                                               df_tmp2['PC_div_orig'].quantile(0.995), 25),
                    'PC_dmt': ref_pc,
                    'TPC_c': ref_TPC,
                    'step': np.arange(25)
                }))
            
        else:
            ref_pc = 'C3_C4_mix'
            df_tmp = df_tmp[(df_tmp['PC_dmt'] == ref_pc)]
            if len(df_tmp) < 50:
                continue
            df_pred = df_pred.append(pd.DataFrame({
                'Year': yr,
                'season': seas,
                'dFence_orig': df_tmp['dFence_orig'].median(),
                'dTank_orig': df_tmp['dTank_orig'].median(),
                'Biomass_orig': df_tmp['Biomass_orig'].median(),
                'CP_orig': df_tmp['CP_orig'].median(),
                'PC_div_orig': np.linspace(df_tmp['PC_div_orig'].quantile(0.005),
                                           df_tmp['PC_div_orig'].quantile(0.995), 25),
                'PC_dmt': ref_pc,
                'TPC_c': ref_TPC,
                'step': np.arange(25)
            }))
    else:
        if len(df_tmp) < 50:
            continue
        df_pred = df_pred.append(pd.DataFrame({
            'Year': yr,
            'season': seas,
            'dFence_orig': df_tmp['dFence_orig'].median(),
            'dTank_orig': df_tmp['dTank_orig'].median(),
            'Biomass_orig': df_tmp['Biomass_orig'].median(),
            'CP_orig': df_tmp['CP_orig'].median(),
            'PC_div_orig': np.linspace(df_tmp['PC_div_orig'].quantile(0.005), 
                                       df_tmp['PC_div_orig'].quantile(0.995), 25),
            'PC_dmt': 'Overall',
            'TPC_c': ref_TPC,
            'step': np.arange(25)
        }))
    for c in ['dFence', 'dTank', 'Biomass', 'CP', 'PC_div']:
        scaler = scaler_dict[yr][seas][c]
        df_pred.loc[(df_pred['Year'] == yr) & 
                    (df_pred['season'] == seas), c] = scaler.transform(
            df_pred.loc[(df_pred['Year'] == yr) & 
                        (df_pred['season'] == seas), c + '_orig'].values.reshape(-1, 1)).flatten()
    df_pred.loc[(df_pred['Year'] == yr) & 
                (df_pred['season'] == seas), 
                'mod_pred'] = yrly_mod_dict[yr][seas]['M_best'].predict(df_pred.loc[(df_pred['Year'] == yr) &
                                                                               (df_pred['season'] == seas)])
    
df_pred['season'] = pd.Categorical(df_pred['season'], ['early', 'mid', 'late'])
df_pred['pred_rel_freq'] = np.nan
for yr in df_pred.Year.unique():
    for seas in df_pred.season.unique():
        df_wkly_grid_sub = df_wkly_grid_full[(df_wkly_grid_full['Year'] == yr) & 
                                             (df_wkly_grid_full['season'] == seas) & 
                                             (df_wkly_grid_full['mod_data'] == 'train')]
        df_pred.loc[(df_pred['Year'] == yr) & 
                    (df_pred['season'] == seas), 'pred_rel_freq'] = (df_pred.loc[(df_pred['Year'] == yr) &
                                                                                 (df_pred['season'] == seas), 'mod_pred'] * df_wkly_grid_sub['grazing_wkly_sum'].mean()) / (df_wkly_grid_sub.groupby('Pasture')['grazing_wkly_sum'].mean().mean() / df_wkly_grid_sub.groupby(
            ['Pasture'])['grazing_wkly_sum'].count().mean())
        
        #if not (mbest_coefs_dict[yr][seas]['Param'] == 'PC_dmt:C3').any():
        #    df_pred_tmp = df_pred[(df_pred['Year'] == yr) &
        #                          (df_pred['season'] == seas)].groupby(['step']).mean()
        #    df_pred_tmp['PC_dmt'] = 'Average'
        #    df_pred_tmp['season'] = seas
        #    df_pred_tmp['Year'] = yr
        #    df_pred = df_pred[~((df_pred['Year'] == yr) &
        #                        (df_pred['season'] == seas))]
        #    df_pred = df_pred.append(df_pred_tmp)

2016 early
2016 mid
2016 late
2017 early
2017 mid
2017 late
2018 early
2018 mid
2018 late


In [34]:
"""
Plot marginal effects of plant community diversity by dominant plant community and season
"""
x_var = 'PC_div_orig'
hue_order = [x for x in ['C4', 'C3_C4_mix', 'C3', 'Saltgrass', 'Other', 'Overall'] if x in df_pred['PC_dmt'].unique()]
hue_labels = ['C3/C4 mix' if x == 'C3_C4_mix' else x for x in hue_order]
g = sns.FacetGrid(df_pred, col='season', row='Year', hue='PC_dmt',
                  col_order=['early', 'mid', 'late'], 
                  hue_order=hue_order, palette=['blue', 'olive', 'darkorange', 'pink', 'grey', 'black'],
                  margin_titles=True,
                  legend_out=True, sharey=False, height=2.25, aspect=1.5, gridspec_kws={"hspace":0.2, "wspace":0.2})
#plt.figure()
g.map_dataframe(sns.lineplot, x=x_var, y='pred_rel_freq').add_legend(bbox_to_anchor=(1.075, 0.5))
g.set_xlabels(None)
g.set_ylabels(None)
axes = g.axes.flat
for idx, ax in enumerate(axes):
    ax.axhline(y=1.0, linestyle='dashed', color='grey')
    ax.set_ylim((0.0, ax.get_ylim()[-1]))
    #if ax.get_ylim()[-1] < 3.0:
    ax.set_ylim((0.0, 3.0))
    cur_title = ax.get_title()
    plt.setp(ax.texts, text="") # remove the original texts
    if idx == 3:
        ax.set_ylabel('Relative probability of selection', labelpad=10)
    if idx == 7:
        ax.set_xlabel('Plant community diversity (Shannon H)', labelpad=10)
if len(hue_order) > 1:
    for ax in axes:
        box = ax.get_position()
        ax.set_position([box.x0, box.y0, box.width*0.6, box.height*0.9])
    leg = g.legend
    # change legend texts
    new_title = 'Plant community'
    leg.set_title(new_title, prop={'weight': 'bold',
            'size': 12,
            })
    new_labels=hue_labels
    for t, l in zip(leg.texts, new_labels):
        t.set_text(l)
else:
    g.legend.remove()
g.set(xticks=np.arange(0, 2.5, 0.5))
g.set_titles(row_template = '{row_name}', col_template = '{col_name}-season', fontweight='bold')
plt.savefig('C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/figures/PC_div_X_PC_dmt.png',
            bbox_inches='tight', dpi=300)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

  self.fig.tight_layout(*args, **kwargs)
  self.fig.tight_layout(*args, **kwargs)


In [None]:
df_wkly_grid_full[(df_wkly_grid_full.Year == 2016)&
            (df_wkly_grid_full.season == 'mid') &
                  (df_wkly_grid_full.TPC_c == 'Flat Plains') &
                 (df_wkly_grid_full.PC_dmt == 'Other') &
                 (df_wkly_grid_full.mod_data == 'test')]['PC_div_orig'].describe()

In [None]:
df_wkly_grid_full.columns

In [None]:
df_wkly_grid_train['Pasture'].unique()

In [None]:
import xarray as xr
xr_dat_past = df_wkly_grid_train[df_wkly_grid_train['Pasture'] == '7NW'].set_index(['Year', 'week', 'UTM_Y', 'UTM_X']).to_xarray()

In [58]:
#df_wkly_grid_train_means[df_wkly_grid_train_means['Pasture'] == '7NW']['pred_seas_grp_val']#.value_counts()

In [59]:
pc_dmt_dict = {
    'C4': 1.0,
    'C3_C4_mix': 2.0,
    'C3': 3.0,
    'Saltgrass': 4.0,
    'Other': 5.0
}

tpc_dict = {
    'Highlands': 1.0,
    'Open Slopes': 2.0,
    'Flat Plains': 3.0,
    'Lowlands': 4.0,
    'Other': 5.0
}

pred_grps_dict = {
    'Low': 1,
    'Mod. Low': 2,
    'Avg.': 3,
    'Mod. Hi': 4,
    'Hi': 5
}


def convert_from_dict(x, set_dict):
    return set_dict[x] if x in set_dict else x
    
xr_dat_past['PC_dmt_val'] = xr.apply_ufunc(convert_from_dict, xr_dat_past['PC_dmt'],
                                           kwargs={'set_dict': pc_dmt_dict}, vectorize=True, dask='parallelized')
xr_dat_past['TPC_val'] = xr.apply_ufunc(convert_from_dict, xr_dat_past['TPC_c'],
                                           kwargs={'set_dict': tpc_dict}, vectorize=True, dask='parallelized')
xr_dat_past['pred_seas_grp_val'] = xr.apply_ufunc(convert_from_dict, xr_dat_past['pred_seas_grp'],
                                                   kwargs={'set_dict': pred_grps_dict}, vectorize=True, dask='parallelized')
#xr_dat_past['PC_div'] = xr_dat_past['PC_div'].fillna(0)

ValueError: cannot convert float NaN to integer

In [36]:
#xr_dat_early['grazing_rel_freq'].quantile(0.98)

In [37]:
from matplotlib.gridspec import GridSpec
from matplotlib import colors, cm
import matplotlib.patches as mpatches
from mpl_toolkits.axes_grid1 import make_axes_locatable

tpc_labels = ['Complex' if x == 'Other' else x for x in tpc_dict.keys()]

tpc_cmap = colors.ListedColormap(['orange', 'yellow', 'lightgreen', 'darkgreen', 'red'])
tpc_patches =[mpatches.Patch(color=c, label=list(tpc_labels)[i]) for i, c in enumerate(tpc_cmap.colors)]

pc_dmt_cmap = colors.ListedColormap(['blue', 'olive', 'darkorange', 'pink', 'grey'])
pc_patches =[mpatches.Patch(color=c, label=list(pc_dmt_dict.keys())[i]) for i, c in enumerate(pc_dmt_cmap.colors)]

rel_use_breaks = [0, 0.001, 0.5, 0.75, 0.9, 1.0, 1.25, 2.0, 3.0, 4.0, 1000]
rel_use_labels = ['None', 'V. low', 'Low', 'Mod. low', 'Avg.', 'Mod. hi', 'Hi', 'V. Hi', 'Ext. Hi']
rel_use_cmap = cm.get_cmap('Spectral_r', len(rel_use_breaks))
res_use_patches =[mpatches.Patch(color=rel_use_cmap(i), 
                                   label=rel_use_labels[i]) for i in np.arange(len(rel_use_labels))]
# Create the colormap
cmap_rel_use = colors.LinearSegmentedColormap.from_list(
            'Relative use', [rel_use_cmap(i) for i in np.arange(len(rel_use_labels))],
    N=len(rel_use_labels))
norm = colors.BoundaryNorm(rel_use_breaks, len(rel_use_breaks))


yr = 2016
xr_dat_early = xr_dat_past.sel(Year=yr, week=21)
xr_dat_mid = xr_dat_past.sel(Year=yr, week=27)
xr_dat_late = xr_dat_past.sel(Year=yr, week=33)

cax_fsize=8
cax_title_fsize=10
cax_pad=0.10

fig = plt.figure(figsize=(14, 10))

gs1 = GridSpec(1, 3, top=0.98, bottom=0.65, wspace=0.05)
ax1 = fig.add_subplot(gs1[0])
ax2 = fig.add_subplot(gs1[1])
ax3 = fig.add_subplot(gs1[2])
ax1.imshow(xr_dat_early['TPC_val'].sortby('UTM_Y', ascending=False).data, cmap=tpc_cmap)
ax1.legend(handles=tpc_patches, ncol=3, bbox_to_anchor=(0.5, 1.4), fontsize=8, 
           loc='upper center', frameon=False,
           title="$\\bf{Topographic\ Position\ Class}$", title_fontsize=10)
divider1 = make_axes_locatable(ax1)
cax1 = divider1.append_axes('top', size='10%', pad=cax_pad)
cax1.axes.set_visible(False)

ax2.imshow(xr_dat_early['PC_dmt_val'].sortby('UTM_Y', ascending=False).data, cmap=pc_dmt_cmap)
ax2.legend(handles=pc_patches, ncol=3, bbox_to_anchor=(0.5, 1.4), fontsize=8, 
           loc='upper center', frameon=False,
           title="$\\bf{Dominant\ Plant\ Community (PC)}$", title_fontsize=10)
divider2 = make_axes_locatable(ax2)
cax2 = divider2.append_axes('top', size='10%', pad=cax_pad)
cax2.axes.set_visible(False)

im3 = ax3.imshow(xr_dat_early['PC_div_orig'].sortby('UTM_Y', ascending=False).data, cmap='cividis', vmin=0.0, vmax=1.5)
divider3 = make_axes_locatable(ax3)
cax3 = divider3.append_axes('top', size='10%', pad=cax_pad)
cbar3 = fig.colorbar(im3, cax=cax3, orientation='horizontal')
cax3.xaxis.set_ticks_position('top')
cax3.set_title("$\\bf{PC\ Diversity\ (Shannon\ H)}$", fontsize=10)
cbar3.ax.tick_params(labelsize=8)

gs2 = GridSpec(3, 4, top=0.65, bottom=0.02, hspace=0.01)
ax4 = fig.add_subplot(gs2[0, 0])
ax5 = fig.add_subplot(gs2[0, 1])
ax6 = fig.add_subplot(gs2[0, 2])
ax7 = fig.add_subplot(gs2[0, 3])
im4 = ax4.imshow(xr_dat_early['Biomass_orig'].sortby('UTM_Y', ascending=False).data, cmap='magma', vmin=200, vmax=2500)
divider4 = make_axes_locatable(ax4)
cax4 = divider4.append_axes('top', size='10%', pad=cax_pad)
cbar4 = fig.colorbar(im4, cax=cax4, orientation='horizontal')
cax4.xaxis.set_ticks_position('top')
cax4.set_title("$\\bf{Biomass\ (kg/ha)}$", fontsize=10)
cbar4.ax.tick_params(labelsize=8)
#cax4left = divider4.append_axes('left', size='5%', pad=0.05)
#cax4left.yaxis.set_label("early-season")

im5 = ax5.imshow(xr_dat_early['CP_orig'].sortby('UTM_Y', ascending=False).data, cmap='viridis', vmin=6, vmax=12)
divider5 = make_axes_locatable(ax5)
cax5 = divider5.append_axes('top', size='10%', pad=cax_pad)
cbar5 = fig.colorbar(im5, cax=cax5, orientation='horizontal')
cax5.xaxis.set_ticks_position('top')
cax5.set_title("$\\bf{Crude\ Protein\ (\%)}$", fontsize=10)
cbar5.ax.tick_params(labelsize=8)

im6 = ax6.imshow(xr_dat_early['grazing_rel_freq'].sortby('UTM_Y', ascending=False).data, cmap='Spectral_r', vmin=0, vmax=4)
divider6 = make_axes_locatable(ax6)
cax6 = divider6.append_axes('top', size='10%', pad=cax_pad)
cbar6 = fig.colorbar(im6, cax=cax6, orientation='horizontal')
cax6.xaxis.set_ticks_position('top')
cax6.set_title("$\\bf{Relative\ use}$", fontsize=10)
cbar6.ax.tick_params(labelsize=8)

ax7.imshow(xr_dat_early['pred_seas_grp_val'].sortby('UTM_Y', ascending=False).data, cmap=pred_grps_cmap)
divider7 = make_axes_locatable(ax7)
cax7 = divider7.append_axes('top', size='10%', pad=cax_pad)
cax7.axes.set_visible(False)
ax7.legend(handles=pred_grps_patches, ncol=3, bbox_to_anchor=(0.5, 1.55), fontsize=8, 
           loc='upper center', frameon=False, title="$\\bf{Predicted\ use}$", title_fontsize=10)

ax8 = fig.add_subplot(gs2[1, 0])
ax9 = fig.add_subplot(gs2[1, 1])
ax10 = fig.add_subplot(gs2[1, 2])
ax11 = fig.add_subplot(gs2[1, 3])
ax8.imshow(xr_dat_mid['Biomass_orig'].sortby('UTM_Y', ascending=False).data, cmap='magma', vmin=200, vmax=2500)
ax9.imshow(xr_dat_mid['CP_orig'].sortby('UTM_Y', ascending=False).data, cmap='viridis', vmin=6, vmax=12)
ax10.imshow(xr_dat_mid['grazing_rel_freq'].sortby('UTM_Y', ascending=False).data, cmap='Spectral_r', vmin=0.0, vmax=4)
ax11.imshow(xr_dat_mid['pred_seas_grp_val'].sortby('UTM_Y', ascending=False).data, cmap=pred_grps_cmap)

ax12 = fig.add_subplot(gs2[2, 0])
ax13 = fig.add_subplot(gs2[2, 1])
ax14 = fig.add_subplot(gs2[2, 2])
ax15 = fig.add_subplot(gs2[2, 3])
ax12.imshow(xr_dat_late['Biomass_orig'].sortby('UTM_Y', ascending=False).data, cmap='magma', vmin=200, vmax=2500)
ax13.imshow(xr_dat_late['CP_orig'].sortby('UTM_Y', ascending=False).data, cmap='viridis', vmin=6, vmax=12)
ax14.imshow(xr_dat_late['grazing_rel_freq'].sortby('UTM_Y', ascending=False).data, cmap='Spectral_r', vmin=0.0, vmax=4)
ax15.imshow(xr_dat_late['pred_seas_grp_val'].sortby('UTM_Y', ascending=False).data, cmap=pred_grps_cmap)

for ax in [ax1, ax2, ax3, ax4, ax5, ax6, ax7, ax8, ax9, ax10, ax11, ax12, ax13, ax14, ax15]:
    #ax.axis('off')
    ax.set_yticklabels([])
    ax.set_xticklabels([])
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_frame_on(False)
    
ax4.set_ylabel("$\\bf{early{-}season}$" + "\n(week 21)", fontdict={'size': 12}, labelpad=15)
ax8.set_ylabel("$\\bf{mid{-}season}$" + "\n(week 27)", fontdict={'size': 12}, labelpad=15)
ax12.set_ylabel("$\\bf{late{-}season}$" + "\n(week 33)", fontdict={'size': 12}, labelpad=15)


plt.savefig('C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/figures/Pasture_example_' + str(yr) + '.png',
            bbox_inches='tight', dpi=300)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

NameError: name 'pred_grps_cmap' is not defined

In [60]:
from matplotlib.gridspec import GridSpec
from matplotlib import colors, cm
import matplotlib.patches as mpatches
from mpl_toolkits.axes_grid1 import make_axes_locatable

tpc_labels = ['Complex' if x == 'Other' else x for x in tpc_dict.keys()]

tpc_cmap = colors.ListedColormap(['orange', 'yellow', 'lightgreen', 'darkgreen', 'red'])
tpc_patches =[mpatches.Patch(color=c, label=list(tpc_labels)[i]) for i, c in enumerate(tpc_cmap.colors)]

pc_dmt_cmap = colors.ListedColormap(['blue', 'olive', 'darkorange', 'pink', 'grey'])
pc_patches =[mpatches.Patch(color=c, label=list(pc_dmt_dict.keys())[i]) for i, c in enumerate(pc_dmt_cmap.colors)]

rel_use_breaks = [0, 0.001, 0.5, 0.75, 0.9, 1.0, 1.25, 2.5, 4.0, 1000]
rel_use_labels = ['None', '', 'Low', '', 'Avg.', '', 'Hi', '', 'V. Hi', '']
rel_use_cmap = cm.get_cmap('Spectral_r', len(rel_use_breaks))
res_use_patches =[mpatches.Patch(color=rel_use_cmap(i), 
                                   label=rel_use_labels[i]) for i in np.arange(len(rel_use_labels))]
# Create the colormap
cmap_rel_use = colors.LinearSegmentedColormap.from_list(
            'Relative use', [rel_use_cmap(i) for i in np.arange(len(rel_use_labels))],
    N=len(rel_use_labels))
norm_rel_use = colors.BoundaryNorm(rel_use_breaks, len(rel_use_breaks))

yr = 2017
xr_dat_early = xr_dat_past.sel(Year=yr, week=21)
xr_dat_mid = xr_dat_past.sel(Year=yr, week=27)
xr_dat_late = xr_dat_past.sel(Year=yr, week=33)

cax_fsize=8
cax_title_fsize=10
cax_pad=0.10

fig = plt.figure(figsize=(14, 10))

gs1 = GridSpec(1, 3, top=0.98, bottom=0.65, wspace=0.05)
ax1 = fig.add_subplot(gs1[0])
ax2 = fig.add_subplot(gs1[1])
ax3 = fig.add_subplot(gs1[2])
ax1.imshow(xr_dat_early['TPC_val'].sortby('UTM_Y', ascending=False).data, cmap=tpc_cmap)
ax1.legend(handles=tpc_patches, ncol=3, bbox_to_anchor=(0.5, 1.4), fontsize=8, 
           loc='upper center', frameon=False,
           title="$\\bf{Topographic\ Position\ Class}$", title_fontsize=10)
divider1 = make_axes_locatable(ax1)
cax1 = divider1.append_axes('top', size='10%', pad=cax_pad)
cax1.axes.set_visible(False)

ax2.imshow(xr_dat_early['PC_dmt_val'].sortby('UTM_Y', ascending=False).data, cmap=pc_dmt_cmap)
ax2.legend(handles=pc_patches, ncol=3, bbox_to_anchor=(0.5, 1.4), fontsize=8, 
           loc='upper center', frameon=False,
           title="$\\bf{Dominant\ Plant\ Community (PC)}$", title_fontsize=10)
divider2 = make_axes_locatable(ax2)
cax2 = divider2.append_axes('top', size='10%', pad=cax_pad)
cax2.axes.set_visible(False)

im3 = ax3.imshow(xr_dat_early['PC_div_orig'].sortby('UTM_Y', ascending=False).data, cmap='cividis', vmin=0.0, vmax=1.5)
divider3 = make_axes_locatable(ax3)
cax3 = divider3.append_axes('top', size='10%', pad=cax_pad)
cbar3 = fig.colorbar(im3, cax=cax3, orientation='horizontal')
cax3.xaxis.set_ticks_position('top')
cax3.set_title("$\\bf{PC\ Diversity\ (Shannon\ H)}$", fontsize=10)
cbar3.ax.tick_params(labelsize=8)

gs2 = GridSpec(3, 3, top=0.65, bottom=0.02, hspace=0.01)
ax4 = fig.add_subplot(gs2[0, 0])
ax5 = fig.add_subplot(gs2[0, 1])
ax6 = fig.add_subplot(gs2[0, 2])
im4 = ax4.imshow(xr_dat_early['Biomass_orig'].sortby('UTM_Y', ascending=False).data, cmap='magma', vmin=200, vmax=2500)
divider4 = make_axes_locatable(ax4)
cax4 = divider4.append_axes('top', size='10%', pad=cax_pad)
cbar4 = fig.colorbar(im4, cax=cax4, orientation='horizontal')
cax4.xaxis.set_ticks_position('top')
cax4.set_title("$\\bf{Biomass\ (kg/ha)}$", fontsize=10)
cbar4.ax.tick_params(labelsize=8)
#cax4left = divider4.append_axes('left', size='5%', pad=0.05)
#cax4left.yaxis.set_label("early-season")

im5 = ax5.imshow(xr_dat_early['CP_orig'].sortby('UTM_Y', ascending=False).data, cmap='viridis', vmin=6, vmax=12)
divider5 = make_axes_locatable(ax5)
cax5 = divider5.append_axes('top', size='10%', pad=cax_pad)
cbar5 = fig.colorbar(im5, cax=cax5, orientation='horizontal')
cax5.xaxis.set_ticks_position('top')
cax5.set_title("$\\bf{Crude\ Protein\ (\%)}$", fontsize=10)
cbar5.ax.tick_params(labelsize=8)

im6 = ax6.imshow(xr_dat_early['grazing_rel_freq'].sortby('UTM_Y', ascending=False).data, cmap=cmap_rel_use, norm=norm_rel_use)
divider6 = make_axes_locatable(ax6)
cax6 = divider6.append_axes('top', size='10%', pad=cax_pad)
cbar6 = fig.colorbar(im6, cax=cax6, orientation='horizontal')
cax6.xaxis.set_ticks_position('top')
cax6.set_title("$\\bf{Relative\ use}$", fontsize=10)
cbar6.ax.tick_params(labelsize=8)
cbar6.ax.set_xticklabels(rel_use_labels)

ax8 = fig.add_subplot(gs2[1, 0])
ax9 = fig.add_subplot(gs2[1, 1])
ax10 = fig.add_subplot(gs2[1, 2])
ax8.imshow(xr_dat_mid['Biomass_orig'].sortby('UTM_Y', ascending=False).data, cmap='magma', vmin=200, vmax=2500)
ax9.imshow(xr_dat_mid['CP_orig'].sortby('UTM_Y', ascending=False).data, cmap='viridis', vmin=6, vmax=12)
ax10.imshow(xr_dat_mid['grazing_rel_freq'].sortby('UTM_Y', ascending=False).data, cmap=cmap_rel_use, norm=norm_rel_use)

ax12 = fig.add_subplot(gs2[2, 0])
ax13 = fig.add_subplot(gs2[2, 1])
ax14 = fig.add_subplot(gs2[2, 2])
ax12.imshow(xr_dat_late['Biomass_orig'].sortby('UTM_Y', ascending=False).data, cmap='magma', vmin=200, vmax=2500)
ax13.imshow(xr_dat_late['CP_orig'].sortby('UTM_Y', ascending=False).data, cmap='viridis', vmin=6, vmax=12)
ax14.imshow(xr_dat_late['grazing_rel_freq'].sortby('UTM_Y', ascending=False).data, cmap=cmap_rel_use, norm=norm_rel_use)

for ax in [ax1, ax2, ax3, ax4, ax5, ax6, ax8, ax9, ax10, ax12, ax13, ax14]:
    #ax.axis('off')
    ax.set_yticklabels([])
    ax.set_xticklabels([])
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_frame_on(False)
    
ax1.set_ylabel("$\\bf{static}$", fontdict={'size': 12}, labelpad=15)
ax4.set_ylabel("$\\bf{early{-}season}$" + "\n(week 21)", fontdict={'size': 12}, labelpad=15)
ax8.set_ylabel("$\\bf{mid{-}season}$" + "\n(week 27)", fontdict={'size': 12}, labelpad=15)
ax12.set_ylabel("$\\bf{late{-}season}$" + "\n(week 33)", fontdict={'size': 12}, labelpad=15)


plt.savefig('C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/figures/Pasture_example_' + str(yr) + '_no_pred.png',
            bbox_inches='tight', dpi=300)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [211]:
xr_dat_early['Biomass_orig'].isel(dict(UTM_X=10, UTM_Y=15))['UTM_X']

In [210]:
'te' in 'test'

True

In [156]:
"""
Compare models by season: how does selection change with distance to fence and watertank
"""
ref_TPC = 'Flat Plains'
ref_pc = 'C3_C4_mix'
df_pred = pd.DataFrame(columns=['season','dFence_orig', 'dTank_orig', 'Biomass_orig', 'CP_orig','PC_div_orig',
                                'PC_dmt', 'TPC_c', 'Feature', 'Model'])
for mod in tqdm(['M1', 'M_best']):
    for seas, df_sub in df_wkly_grid_full[df_wkly_grid_full['mod_data'] == 'train'].groupby('season'):
        if len(df_sub[(df_sub['PC_dmt'] == ref_pc) & (df_sub['TPC_c'] == ref_TPC)]) < 100:
            continue
        else:
            df_tmp = df_sub[(df_sub['TPC_c'] == ref_TPC) & (df_sub['PC_dmt'] == ref_pc)]
            for feat in ['Fence', 'Tank']:
                if feat == 'Fence':
                    dFence = np.arange(df_tmp['dFence_orig'].min(), df_tmp['dFence_orig'].max(), 10)
                else:
                    dFence = df_tmp['dFence_orig'].median()
                if feat == 'Tank':
                    dTank = np.arange(df_tmp['dTank_orig'].min(), df_tmp['dTank_orig'].max(), 10)
                else:
                    dTank = df_tmp['dTank_orig'].median()
                    
                df_pred = df_pred.append(pd.DataFrame({
                    'season': seas,
                    'dFence_orig': dFence,
                    'dTank_orig': dTank,
                    'Biomass_orig': df_tmp['Biomass_orig'].median(),
                    'CP_orig': df_tmp['CP_orig'].median(),
                    'PC_div_orig': df_tmp['PC_div_orig'].median(),
                    'PC_dmt': ref_pc,
                    'TPC_c': ref_TPC,
                    'Feature': feat,
                    'Model': mod
                }))
        for c in ['dFence', 'dTank', 'Biomass', 'CP', 'PC_div']:
            scaler = scaler_dict[seas][c]
            df_pred.loc[df_pred['season'] == seas, c] = scaler.transform(
                df_pred.loc[df_pred['season'] == seas, c + '_orig'].values.reshape(-1, 1)).flatten()
        df_pred.loc[(df_pred['season'] == seas) & (df_pred['Model'] == mod), 
                    'mod_pred'] = seas_mod_dict[seas][mod].predict(df_pred.loc[(df_pred['season'] == seas) & (df_pred['Model'] == mod)])
df_pred['season'] = pd.Categorical(df_pred['season'], ['early', 'mid', 'late'])
df_pred['pred_rel_freq'] = np.nan
for seas in df_pred.season.unique():
    df_wkly_grid_sub = df_wkly_grid_full[(df_wkly_grid_full['season'] == seas) & (df_wkly_grid_full['mod_data'] == 'train')]
    df_pred.loc[df_pred['season'] == seas, 'pred_rel_freq'] = (df_pred.loc[df_pred['season'] == seas, 'mod_pred'] * df_wkly_grid_sub['grazing_wkly_sum'].mean()) / (df_wkly_grid_sub['grazing_wkly_sum'].mean() / df_wkly_grid_sub.groupby(
        ['week', 'Pasture', 'Steer_ID'])['grazing_wkly_sum'].count().mean())

  0%|          | 0/2 [00:00<?, ?it/s]

KeyError: 'early'

In [26]:
df_pred['dFence_orig'][df_pred['Feature'] == 'Tank'] = np.nan
df_pred['dTank_orig'][df_pred['Feature'] == 'Fence'] = np.nan
df_pred_long = pd.melt(df_pred, id_vars=['season', 'Model', 'Feature', 'pred_rel_freq'], value_vars=['dFence_orig', 'dTank_orig'], value_name='Distance')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_pred['dFence_orig'][df_pred['Feature'] == 'Tank'] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_pred['dTank_orig'][df_pred['Feature'] == 'Fence'] = np.nan


In [28]:
"""
Plot marginal effects of distance to fence by model and season
"""
x_var = 'Distance'
g = sns.FacetGrid(df_pred_long[(df_pred_long['Model'].isin(['M1', 'M_best'])) & (df_pred_long['Feature'] == 'Fence')],
                  col='season', hue='Model',
                               col_order=['early', 'mid', 'late'], 
                  legend_out=True, sharey='col', sharex=True)
#plt.figure()
g.map_dataframe(sns.lineplot, x=x_var, y='pred_rel_freq').add_legend(bbox_to_anchor=(0.5, 0.98), 
                                                                     loc='upper center',
                                                                                                   borderaxespad=0, 
                                                                                                   ncol=2, 
                                                                                                   frameon=True)
g.set_xlabels('Distance to nearest fence (m)')
g.set_ylabels('Relative probability of selection')
axes = g.fig.axes
for ax in axes:
    ax.axhline(y=1.0, linestyle='dashed', color='grey')
    ax.set_ylim((0.0, ax.get_ylim()[-1]))
    if ax.get_ylim()[-1] < 2.0:
        ax.set_ylim((0.0, 2.0))
    cur_title = ax.get_title()
    ax.set_title(re.sub('season = ', '', cur_title + '-season'))
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width, box.height*0.8])
leg = g.legend
# change legend texts
new_title = 'Model type'
leg.set_title(new_title, prop={'weight': 'bold',
        'size': 12,
        })
new_labels = ['Null model (M0)', 'Fully paramaterized (M5)']
for t, l in zip(leg.texts, new_labels):
    t.set_text(l)
plt.savefig('C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/figures/dfence.png', 
            bbox_inches='tight', dpi=300)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [30]:
"""
Plot marginal effects of distance to water tank by model and season
"""
x_var = 'Distance'
g = sns.FacetGrid(df_pred_long[(df_pred_long['Model'].isin(['M1', 'M_best'])) & (df_pred_long['Feature'] == 'Tank')],
                  col='season', hue='Model',
                               col_order=['early', 'mid', 'late'], 
                  legend_out=True, sharey='col', sharex=True)
#plt.figure()
g.map_dataframe(sns.lineplot, x=x_var, y='pred_rel_freq').add_legend(bbox_to_anchor=(0.5, 0.98), 
                                                                     loc='upper center',
                                                                                                   borderaxespad=0, 
                                                                                                   ncol=2, 
                                                                                                   frameon=True)
g.set_xlabels('Distance to water tank (m)')
g.set_ylabels('Relative probability of selection')
axes = g.fig.axes
for ax in axes:
    ax.axhline(y=1.0, linestyle='dashed', color='grey')
    ax.set_ylim((0.0, ax.get_ylim()[-1]))
    if ax.get_ylim()[-1] < 2.0:
        ax.set_ylim((0.0, 2.0))
    cur_title = ax.get_title()
    ax.set_title(re.sub('season = ', '', cur_title + '-season'))
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width, box.height*0.8])
leg = g.legend
# change legend texts
new_title = 'Model type'
leg.set_title(new_title, prop={'weight': 'bold',
        'size': 12,
        })
new_labels = ['Null model (M0)', 'Fully paramaterized (M5)']
for t, l in zip(leg.texts, new_labels):
    t.set_text(l)
plt.savefig('C:/Users/sean.kearney/OneDrive - USDA/Documents/Projects/GPS_v_hetgen/figures/dtank.png', 
            bbox_inches='tight', dpi=300)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [32]:
plt.figure()
sns.kdeplot(x='grazing_rel_freq', hue='season', 
            data=df_wkly_grid_train, bw_adjust=2.0, clip=(0, None),
           common_norm=False, cumulative=False, common_grid=False)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:xlabel='grazing_rel_freq', ylabel='Density'>

In [49]:
df_wkly_grid_train['grazing_rel_freq_seas'] = (df_wkly_grid_train.groupby(['season', 'Pasture', 'Steer_ID', 'UTM_X', 'UTM_Y']).grazing_secs.transform('sum') /\
(df_wkly_grid_train.groupby(['season', 'Pasture', 'Steer_ID', 'UTM_X', 'UTM_Y']).grazing_wkly_sum.transform('sum') /\
 df_wkly_grid_train.groupby(['week', 'season', 'Pasture', 'Steer_ID']).grazing_secs.transform(lambda x: np.mean(len(x))))).reset_index(drop=True)

In [50]:
#df_wkly_grid_train[df_wkly_grid_train['mod_data'] == 'train']

In [51]:
#df_wkly_grid_train[(df_wkly_grid_train.Pasture == '7NW') &
#                 (df_wkly_grid_train.UTM_X == 524055.0) & 
#                 (df_wkly_grid_train.UTM_Y == 4522695.0) &
                  #(df_wkly_grid_train.Steer_ID == '2287_D1_2017') & 
#                 (df_wkly_grid_train.mod_data == 'train')].sort_values('Steer_ID')

In [52]:
df_wkly_grid_train['grazing_rel_freq_seas_grp'] = pd.cut(df_wkly_grid_train.groupby(['season', 'Pasture', 'UTM_X', 'UTM_Y'])['grazing_rel_freq_seas'].transform('mean'), 
                                          bins=rel_freq_bins, 
                                          labels=bin_labs,
                                          include_lowest=True)
df_wkly_grid_train['grazing_rel_freq_past_grp'] = pd.cut(df_wkly_grid_train.groupby(['week', 'Pasture', 'UTM_X', 'UTM_Y'])['grazing_rel_freq'].transform('mean'), 
                                          bins=rel_freq_bins, 
                                          labels=bin_labs,
                                          include_lowest=True)

In [59]:
df_wkly_grid_train.groupby('season')['grazing_secs'].apply(lambda x: np.sum(np.isnan(x)))

season
early    1
mid      0
late     0
Name: grazing_secs, dtype: int64

In [40]:
df_seas_grid = df_wkly_grid_train.groupby(['season', 'mod_data', 'Pasture',
                           'UTM_X', 'UTM_Y'])[['grazing_rel_freq_seas', 
                                                           'grazing_rel_freq_seas_grp',
                                                           'Biomass_orig',
                                                           'CP_orig', 
                                                           'PC_dmt', 
                                                           'PC_div_orig']].aggregate(func={
    'grazing_rel_freq_seas': 'last',
    'grazing_rel_freq_seas_grp': 'last',
    'Biomass_orig': np.mean,
    'CP_orig': np.mean,
    'PC_dmt': 'last',
    'PC_div_orig': 'last'
}).reset_index()
    #['mod_data', 'Pasture', 'Steer_ID', 'UTM_X', 'UTM_Y'])['grazing_rel_freq_wkly_grp'].transform(lambda x: '_'.join(x.astype('str')))

In [45]:
df_seas_grid['grazing_rel_freq_seas_grp'].dropna()

691397      Low
691398      Low
691399      Low
691400     Avg.
691401      Low
           ... 
4128583    Avg.
4128584      Hi
4128585      Hi
4128586      Hi
4128904      Hi
Name: grazing_rel_freq_seas_grp, Length: 25953, dtype: category
Categories (5, object): ['Low' < 'Mod. Low' < 'Avg.' < 'Mod. Hi' < 'Hi']

In [41]:
df_seas_grid = df_seas_grid[df_seas_grid['mod_data'] == 'train']
df_seas_grid['grazing_rel_freq_seas_traj'] = df_seas_grid.groupby(['mod_data', 'Pasture', 
                      'UTM_X', 'UTM_Y'])['grazing_rel_freq_seas_grp'].transform('_'.join)
df_seas_grid['season'] = pd.Categorical(df_seas_grid['season'], ['early', 'mid', 'late'])

TypeError: sequence item 0: expected str instance, float found

In [86]:
plt.figure()
sns.pointplot(x='season', y='Biomass', hue='grazing_rel_freq_seas_traj', 
            data=df_seas_grid[df_seas_grid['grazing_rel_freq_seas_traj'].isin(['Low_Low_Low',
                                                                               'Hi_Hi_Hi'])])

  plt.figure()


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:xlabel='season', ylabel='Biomass'>

In [89]:
plt.figure()
sns.pointplot(x='season', y='PC_div', hue='grazing_rel_freq_seas_traj', 
            data=df_seas_grid[df_seas_grid['grazing_rel_freq_seas_traj'].isin(['Low_Low_Low',
                                                                               'Hi_Hi_Hi'])])

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:xlabel='season', ylabel='PC_div'>

In [53]:
df_seas_grid['grazing_rel_freq_seas_traj'][df_seas_grid['season'] == 'late'].value_counts()

Low_Low_Low                 733
Hi_Hi_Hi                    296
Mod. Low_Low_Low            276
Low_Mod. Low_Low            245
Low_Low_Mod. Low            171
                           ... 
Mod. Hi_Mod. Hi_Mod. Hi      15
Mod. Hi_Mod. Hi_Mod. Low     13
Mod. Low_Mod. Hi_Mod. Hi     12
Mod. Hi_Mod. Low_Mod. Hi     11
Low_Mod. Hi_Mod. Hi           8
Name: grazing_rel_freq_seas_traj, Length: 125, dtype: int64

In [65]:
#plt.figure()
df_plot = df_wkly_grid_train[df_wkly_grid_train['mod_data'] == 'train'].groupby(
    ['season', 'week', 'Pasture', 'Steer_ID'])['grazing_rel_freq_seas_grp'].value_counts().reset_index().groupby(
    ['season', 'Pasture', 'level_4']).mean().reset_index().pivot(columns='level_4',
                                                               index=['season', 'Pasture'], values='grazing_rel_freq_seas_grp').reset_index()#.plot(kind='bar', stacked=True)
df_plot['pct_Hi'] = df_plot[['Hi']].sum(axis=1) / df_plot[['Low', 'Mod. Low', 'Avg.', 
                                                  'Mod. Hi', 'Hi']].sum(axis=1)
df_plot['season'] = pd.Categorical(df_plot['season'], ['early', 'mid', 'late'])
#df_plot
plt.figure()
sns.pointplot(x='season', y='pct_Hi', hue='Pasture', data=df_plot)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:xlabel='season', ylabel='pct_Hi'>

In [66]:
df_wkly_grid_train[df_wkly_grid_train['mod_data'] == 'train'].pivot(index=['Pasture', 'UTM_X', 'UTM_Y'])

level_4,season,Pasture,Low,Mod. Low,Avg.,Mod. Hi,Hi,pct_Hi
0,early,15E,524.833333,253.0,208.666667,122.166667,318.833333,0.223351
1,early,17N,393.833333,342.0,261.0,97.666667,288.333333,0.208509
2,early,20SE,422.0,294.0,315.333333,119.666667,269.0,0.189437
3,early,26E,415.333333,313.0,272.0,125.333333,249.333333,0.181333
4,early,31E,402.666667,355.333333,360.666667,179.333333,323.666667,0.199589
5,early,7NW,392.833333,318.0,270.5,140.833333,264.5,0.190745
6,late,15E,354.25,246.25,368.5,166.75,295.0,0.206186
7,late,17N,486.75,249.75,266.0,97.25,283.0,0.204665
8,late,20SE,424.75,291.25,287.25,141.25,276.25,0.19444
9,late,26E,450.5,251.5,264.0,116.5,295.0,0.214156


In [None]:
#plt.figure()
df_plot = df_wkly_grid_train[df_wkly_grid_train['mod_data'] == 'train'].groupby(
    ['season', 'week', 'Pasture', 'Steer_ID'])['grazing_rel_freq_seas_grp'].value_counts().reset_index().groupby(
    ['season', 'Pasture', 'level_4']).mean().reset_index().pivot(columns='level_4',
                                                               index=['season', 'Pasture'], values='grazing_rel_freq_seas_grp').reset_index()#.plot(kind='bar', stacked=True)
df_plot['pct_Hi'] = df_plot[['Hi']].sum(axis=1) / df_plot[['Low', 'Mod. Low', 'Avg.', 
                                                  'Mod. Hi', 'Hi']].sum(axis=1)
df_plot['season'] = pd.Categorical(df_plot['season'], ['early', 'mid', 'late'])
#df_plot
plt.figure()
sns.pointplot(x='season', y='pct_Hi', hue='Pasture', data=df_plot)

In [46]:
"""
Save data to make Sankey diagrams in R
"""
df_sankey = pd.DataFrame(columns=['class_early', 'class_mid', 'class_late', 'value'])
df_sankey['class_early'] = df_seas_grid[df_seas_grid['season'] == 'early']['grazing_rel_freq_seas_grp'].reset_index(drop=True)
df_sankey['class_mid'] = df_seas_grid[df_seas_grid['season'] == 'mid']['grazing_rel_freq_seas_grp'].reset_index(drop=True)
df_sankey['class_late'] = df_seas_grid[df_seas_grid['season'] == 'late']['grazing_rel_freq_seas_grp'].reset_index(drop=True)
df_sankey['value'] = 1
sankey_grp = df_sankey.groupby(['class_early', 'class_mid', 'class_late']).sum().reset_index(drop=False)
sankey_grp = sankey_grp.reindex(columns=sankey_grp.columns.tolist())
sankey_grp.to_csv('C:/SPK_local/zTEMP/gps_grid_sankey2.csv')

In [11]:
aicw_func = lambda x: np.exp(-0.5 * (x - x.min())) / np.sum(np.exp(-0.5 * (x - x.min())))
aicd_func = lambda x: x - x.min()

In [38]:
"""
Analyze best results for specific model comparisons defined in dictionary above
"""
for k in mod_aic_dict:
    df_results.loc[df_results['model'].isin(mod_aic_dict[k]), 
                   'AICw_' + k] = df_results.loc[df_results['model'].isin(mod_aic_dict[k])].groupby(
        ['week', 'Pasture'])['AIC'].transform(aicw_func)
    df_results.loc[df_results['model'].isin(mod_aic_dict[k]), 
                   'AICd_' + k] = df_results.loc[df_results['model'].isin(mod_aic_dict[k])].groupby(
        ['week', 'Pasture'])['AIC'].transform(aicd_func)
    df_results['top_model_' + k] = df_results.groupby(['week', 'Pasture'])['AICw_' + k].transform(
        lambda x: [int(i) for i in x == x.max()])
    df_results['candidate_model_' + k] = df_results.groupby(['week', 'Pasture'])['AICd_' + k].transform(
        lambda x: [int(i) for i in x <= 2.0])

In [39]:
"""
Display the results for the desired model comparison
"""
mod_grp = 'Hw2'
display(df_results.loc[df_results['model'].isin(mod_aic_dict[mod_grp])].groupby(['season', 'model'])[['top_model_' + mod_grp, 'candidate_model_' + mod_grp,
                                                                                'AICw_' + mod_grp, 'AICd_' + mod_grp]].aggregate(
    func={'AICd_' + mod_grp: [np.median, np.std], 'AICw_' + mod_grp: [np.mean, np.std], 
         'top_model_' + mod_grp: np.sum, 'candidate_model_' + mod_grp: np.sum}, axis=1).sort_values(['season', ('AICd_' + mod_grp, 'median')]))
display(df_results.loc[df_results['model'].isin(mod_aic_dict[mod_grp])].groupby(['model'])[['top_model_' + mod_grp, 'candidate_model_' + mod_grp, 
                                                                      'AICw_' + mod_grp, 'AICd_' + mod_grp]].aggregate(
    func={'AICd_' + mod_grp: [np.median, np.std], 'AICw_' + mod_grp: [np.mean, np.std], 
          'top_model_' + mod_grp: np.sum, 'candidate_model_' + mod_grp: np.sum}, axis=1).sort_values(('AICd_' + mod_grp, 'median')))

Unnamed: 0_level_0,Unnamed: 1_level_0,AICd_Hw2,AICd_Hw2,AICw_Hw2,AICw_Hw2,top_model_Hw2,candidate_model_Hw2
Unnamed: 0_level_1,Unnamed: 1_level_1,median,std,mean,std,sum,sum
season,model,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
early,M2g,0.275,1.081198,0.5061748,0.296941,9,16
early,M2f,3.65,15.11663,0.2215112,0.240397,4,8
early,M2e,6.54,15.016862,0.1527321,0.249046,3,5
early,M2d,19.35,53.099368,0.03105445,0.063824,0,3
early,M2b,28.545,41.595856,0.04482712,0.091801,1,2
early,M2a,46.48,59.347148,0.02105054,0.066307,0,1
early,M2c,55.24,83.766058,0.0226499,0.095805,1,1
late,M2g,0.0,1.393833,0.5669245,0.357838,7,10
late,M2f,3.97,13.607834,0.1704526,0.196187,2,5
late,M2e,17.635,17.977953,0.1075712,0.192545,1,3


Unnamed: 0_level_0,AICd_Hw2,AICd_Hw2,AICw_Hw2,AICw_Hw2,top_model_Hw2,candidate_model_Hw2
Unnamed: 0_level_1,median,std,mean,std,sum,sum
model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
M2g,0.0,1.166435,0.576807,0.336463,28,42
M2f,4.55,25.114508,0.171345,0.20511,8,19
M2e,13.79,22.67833,0.135075,0.21963,6,13
M2d,20.395,41.549821,0.050169,0.117503,2,7
M2b,38.48,83.472705,0.048791,0.136041,3,5
M2a,71.34,89.428555,0.00916,0.041377,0,1
M2c,87.71,120.894697,0.008653,0.058657,1,1
