In [1]:
%load_ext autoreload
%autoreload 2

import os 
# Set the environment for R
if os.environ.get('HOME') == '/home/tom':                                           # Linux
    os.environ['R_HOME'] = '/home/tom/miniconda3/envs/octagon_analysis/lib/R'           # Lab desktop

elif os.environ.get('HOME') is None:                                                # Windows
    if os.environ.get('USERPROFILE') == r'C:\Users\tomha':
        os.environ['R_HOME']= r'C:\Users\tomha\miniconda3\envs\octagon_analysis\lib\R'  # Laptop
    elif os.environ.get('USERPROFILE') == r'C:\Users\Tom':
        os.environ['R_HOME']=r'D:\Users\Tom\miniconda3\envs\octagon_analysis\lib\R'     # Home desktop
# elif:
    # append other machines here


import rpy2.robjects as robjects
print(robjects.r('R.version.string'))

import numpy as np
import pandas as pd
from ipywidgets import IntProgress
from IPython.display import display
from pymer4.models import Lmer
import populate_dataframes
import re




[1] "R version 4.1.3 (2022-03-10)"



In [2]:
k = 20
new_split_dataframes = False

### load data

In [3]:
import pickle

analysis_dir = os.path.join('..', 'data')
# analysis_file = 'analysis_results_2levelsFirstSeenWall_normalisedWallSep.pkl'
analysis_file = 'analysis_results_3levelsFirstSeenWall_normalisedWallSep.pkl'
filename = os.path.join(analysis_dir, analysis_file)
# load the analysis results
with open(filename, 'rb') as f:
    analysis_results = pickle.load(f)

### populate dataframes for glm input

In [4]:
# populate dataframes for solo, solosocial, and social analysis_type
glm_df_solo = populate_dataframes.populate_dataframe(analysis_results, analysis_type='solo')
glm_df_solosocial = populate_dataframes.populate_dataframe(analysis_results, analysis_type='solosocial')
glm_df_social = populate_dataframes.populate_dataframe(analysis_results, analysis_type='social')


### create reference to dataframes

In [5]:
glm_df_social['WallSep'].value_counts()

WallSep
0.25    3888
0.50    2428
1.00    2130
Name: count, dtype: int64

In [6]:
dataframes = {
    'glm_df_solo': glm_df_solo,
    'glm_df_solosocial': glm_df_solosocial,
    'glm_df_social': glm_df_social
}

### shuffle the dataframes for k-fold index selection


In [7]:
shuffled_dataframes_path = os.path.join('..', 'data', 'k_fold_CV', 'shuffled_dataframes.pkl')

if new_split_dataframes:
    
    # shuffle each dataframe
    for name, df in dataframes.items():
        dataframes[name] = df.sample(frac=1, random_state=17).reset_index(drop=True)

    # pickle save shuffled dataframes to sandbox > data, as one dictionary
    with open(shuffled_dataframes_path, 'wb') as f:
        pickle.dump(dataframes, f)

    shuffled_dataframes = dataframes

else:
    # load the shuffled dataframes
    with open(shuffled_dataframes_path, 'rb') as f:
        shuffled_dataframes = pickle.load(f)



### Split each dataframe into folds

In [8]:
# split each dataframe into k equal parts
split_dataframes = {name: np.array_split(df, k) for name, df in shuffled_dataframes.items()}

  return bound(*args, **kwds)


### Cross-validation functions

In [9]:
from contextlib import redirect_stdout

def fit_models(split_df, model_formula):
    '''
    Takes a dataframe and a model formula, and fits k models to the data.
    Wjere k is the number of folds in the split dataframe.
    Use k-1 folds to train each model'''
    
    models = []
    max_count = len(split_df)
    f = IntProgress(min=0, max=max_count, description='Fitting models')
    display(f)

    # for i, df in enumerate(split_df):
    #     print(f"Fold {i}: Type = {type(df)}")

    # Suppress the output of the models fitting process
    with open(os.devnull, 'w') as fnull:
        with redirect_stdout(fnull):
            for i in range(len(split_df)):
                # Combine all folds except the i-th fold
                train_data = pd.concat([df for j, df in enumerate(split_df) if j != i], ignore_index=True)                
                # Fit the Lmer model to these folds
                model = Lmer(model_formula, data=train_data, family='binomial')
                model.fit(
                    # use the bobyqa optimizer and 10000 iterations
                    control="optimizer='bobyqa', optCtrl=list(maxfun=10000)"
                )
                models.append(model)
                print(f"Model {i} fit with {len(train_data)} rows")
                f.value += 1
    
    return models

def drop_nans_relevant_columns(df, model_formula):
    ''' Remove rows with NaNs in any columns that contain them, if these columns are in the model formula.
    This is to avoid making predictions on rows with NaNs in the relevant columns, which would cause errors.
    And also to avoid making predictions on rows with NaNs in the ground truth column, where
    the ground truth cannot be used to check them '''
    
    # drop rows with nans in any columns that contain them (if these columns are in the model formula)
    nan_containing_cols = list(df.isna().sum()[df.isna().sum() > 0].index)
    for column in nan_containing_cols:
        pattern = re.compile(column)
        match = re.search(pattern, model_formula)
        # if match and match.group(0) != "ChooseHigh": # 'ChooseHigh' is dependent variable, we don't need to drop this
        if match:
            df = df.dropna(subset=column)
        else:
            continue


    return df

def calculate_predictions(split_df, models):
    ''' 
    Predict on the held-out fold, for each trained model
    '''

    all_predictions = []
    all_predict_data = []
    
    # for each dataframe type, predict on the held-out fold using the relevant model
    for i, model in enumerate(models):
        
        # predict on the held-out fold
        predict_data = split_df[i].copy()

        # drop rows with nans in any columns that contain them (if these columns are in the model formula)
        # excluding the dependent variable 'ChooseHigh'
        predict_data = drop_nans_relevant_columns(predict_data, model.formula)
        all_predict_data.append(predict_data) # save this to provide data for ground truth later

        predictions = model.predict(predict_data, skip_data_checks=True, verify_predictions=False)

        # concatenate these predictions to the predictions array
        all_predictions.append(predictions)

    return all_predictions, all_predict_data



def calculate_likelihoods(split_df, predictions, model_formula):
    ''' 
    Calculate the likelihood of each prediction given the true output.
    The likelihood is calculated as p^y * (1-p)^(1-y), where p is the predicted probability
    and y is the true output (0 or 1).

    Takes a list of dataframes and a list of predictions, where each array of predictions corresponds
    to the dataframe of the same index.

    Avoid calculating the likelihood for NaN ground truth values. (Social trials without confident inferred
    choices)
    '''
    
    fold_likelihoods = []
    for i, prediction_fold in enumerate(predictions):
        
        predicted_output = np.array(prediction_fold)
        true_output = split_df[i].copy()
        
        # drop rows with nans in any columns that contain them (if these columns are in the model formula)
        # excluding the dependent variable 'ChooseHigh'
        true_output = drop_nans_relevant_columns(true_output, model_formula)

        # only calculate the likelihood for non-NaN ground truth values      
        nonnan_mask = ~np.isnan(true_output['ChooseHigh'])
        true_output = true_output[nonnan_mask]
        try:
            predicted_output = predicted_output[nonnan_mask]
        except Exception as e:
            print(f"Error: {e}")
            print(f"predicted output shape: {predicted_output.shape}, non-nan mask shape: {nonnan_mask.shape}")
            print(f"predicted output: {predicted_output}")
            print(f"nonnan mask: {nonnan_mask}")
            predicted_output = predicted_output[nonnan_mask.to_numpy()]

        # initialise likelihoods array to be the size of the ground truth data once nans have been filtered
        likelihoods = np.full(len(predicted_output), np.nan)


        # calculate the metric for each prediction

        for i, prediction in enumerate(predicted_output):
            ground_truth = true_output.iloc[i]['ChooseHigh']
            likelihood = prediction**ground_truth * (1 - prediction)**(1 - ground_truth)
            likelihoods[i] = likelihood
            if np.isnan(likelihood):
                print(f"NaN likelihood for prediction {prediction} and ground truth {ground_truth}")
                print(f"prediction**ground_truth: {prediction**ground_truth}")
                print(f"(1 - prediction)**(1 - ground_truth): {(1 - prediction)**(1 - ground_truth)}")

        fold_likelihoods.append(likelihoods)

    return fold_likelihoods

def calculate_nlls(fold_likelihoods):
    # #### sum the logs of the likelihoods, and take the negative

    fold_avg_nlls = np.full(len(fold_likelihoods), np.nan)
    fold_all_nlls = []
    for i, prediction_fold in enumerate(fold_likelihoods):
        fold_nlls = -np.log(prediction_fold)
        fold_avg_nll = np.nanmean(fold_nlls)
        print(f"Fold {i} average NLL: {fold_avg_nll}")

        fold_avg_nlls[i] = fold_avg_nll
        fold_all_nlls.append(fold_nlls)

    model_avg_nll = np.nanmean(fold_avg_nlls)

    return fold_all_nlls, fold_avg_nlls, model_avg_nll


def save_cross_validation_results(name, model_formula, split_df, num_folds, predictions,
                                  predict_data, fold_nlls, fold_avg_nlls, model_avg_nll):
    ''' Save the cross-validation results to a file.
     
      Args:
        name (str): Name of the model type.
        model_formula (str): The formula used for the model.
        split_df (list): List of dataframes for each fold.
        num_folds (int): Number of folds in the cross-validation.
        predictions (list): List of arrays of predictions for each fold.
        predict_data (list): List of dataframes used for predictions (different to split_df
        in that all of the rows with nans in relevant prediction columns are removed).
        fold_avg_nlls (np.array): Average negative log likelihoods for each fold.
        model_avg_nll (float): Average negative log likelihood across all folds.
        k (int): Number of folds in the cross-validation.'''
    
    cross_validation_results = {
        "name": name,
        "model_formula" : model_formula,
        "split_df" : split_df,
        "num_folds" : num_folds,
        # "models" : models,
        "predictions" : predictions,
        "predict_data" : predict_data, 
        "fold_nlls" : fold_nlls,
        "fold_avg_nlls" : fold_avg_nlls,
        "average_nll" : model_avg_nll
    }

   # Save the cross-validation results to a file
    dir = os.path.join('..', 'data', 'k_fold_CV')
    filename = f'{num_folds}-fold-CV_results_{name}.pickle'
    filepath = os.path.join(dir, filename)
    with open(filepath, 'wb') as f:
        pickle.dump(cross_validation_results, f)

    print(f"{num_folds}-fold CV data saved to: ", filepath)

In [10]:
def run_cross_validation(split_df, model_formula, name, save_results=False):
    ''' 
    Run k-fold cross-validation on the given dataframes.
    Returns:
    - model_avg_nll: the average negative log likelihood (NLL) across all folds,
    - models: a list of the fitted model for each fold,
    - predictions: a list of arrays of predictions for each fold,
    - predict_data: a list of dataframes used for predictions for each fold,
    - fold_avg_nlls: a list of the average NLL for each fold.
    '''

    n_folds = len(split_df)

    # Step 1: Fit models on k-1 folds for all iterations
    models = fit_models(split_df, model_formula)

    # Step 2: Calculate predictions on the held-out fold for each model
    predictions, predict_data = calculate_predictions(split_df, models)

    # Step 3: Calculate likelihoods for each prediction
    fold_likelihoods = calculate_likelihoods(split_df, predictions, model_formula)

    # Step 4: Calculate NLLs 
    fold_all_nlls, fold_avg_nlls, model_avg_nll = calculate_nlls(fold_likelihoods)

    # Step 5: Save data to file (optional)
    if save_results:
        save_cross_validation_results(name, model_formula, split_df, n_folds, predictions,
                                       predict_data, fold_all_nlls, fold_avg_nlls, model_avg_nll)

    return model_avg_nll, models, predictions, fold_all_nlls, fold_avg_nlls


### Solo models

In [11]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + (1|GlmPlayerID)'
(model_avg_nll, models,
predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solo'], model_formula,
                                                    "solo_randomintercepts_onlydistance",
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.2766952126535541
Fold 1 average NLL: 0.29547239931306646
Fold 2 average NLL: 0.3520194108999301
Fold 3 average NLL: 0.3050664831883057
Fold 4 average NLL: 0.2790347374399119
Fold 5 average NLL: 0.3270675305215161
Fold 6 average NLL: 0.3321872328927074
Fold 7 average NLL: 0.2911329369751824
Fold 8 average NLL: 0.2918816929923042
Fold 9 average NLL: 0.3353433414294066
Fold 10 average NLL: 0.2609944296621782
Fold 11 average NLL: 0.31158630397958414
Fold 12 average NLL: 0.3202526058032808
Fold 13 average NLL: 0.3755990021386372
Fold 14 average NLL: 0.264802398618293
Fold 15 average NLL: 0.28698694428365334
Fold 16 average NLL: 0.284253841106489
Fold 17 average NLL: 0.32131389415869205
Fold 18 average NLL: 0.3050602741686595
Fold 19 average NLL: 0.34629379605398863
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_solo_randomintercepts_onlydistance.pickle


In [39]:
np.sum(fold_all_nlls[8]), fold_avg_nlls[8]*len(fold_all_nlls[8])

(np.float64(213.29266402914783), np.float64(213.29266402914783))

In [12]:
# model_formula = 'ChooseHigh ~ 1 + FirstSeenWall + (1|GlmPlayerID)'
# (model_avg_nll, models,
#   predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solo'], model_formula,
#                                                     "solo_randomintercepts_firstseenonly",
#                                                       save_results=True)

In [13]:
# model_formula = 'ChooseHigh ~ 1 + WallSep + FirstSeenWall + (1|GlmPlayerID)'
# (model_avg_nll, models,
#   predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solo'], model_formula,
#                                                     "solo_randomintercepts_wallsep_firstseen",
#                                                       save_results=True)

In [14]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + WallSep + (1|GlmPlayerID)'
(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solo'], model_formula,
                                                    "solo_randomintercepts_distancepluswallsep",
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.2608716500057588
Fold 1 average NLL: 0.28423616423119086
Fold 2 average NLL: 0.3537761857473281
Fold 3 average NLL: 0.2765300217136157
Fold 4 average NLL: 0.2683716285721701
Fold 5 average NLL: 0.3254190845840474
Fold 6 average NLL: 0.32327703503246646
Fold 7 average NLL: 0.28283360254282547
Fold 8 average NLL: 0.2721974437856051
Fold 9 average NLL: 0.31628012609647604
Fold 10 average NLL: 0.264777550572167
Fold 11 average NLL: 0.3004453465411507
Fold 12 average NLL: 0.2994899022599431
Fold 13 average NLL: 0.3555282737893079
Fold 14 average NLL: 0.2575248172073129
Fold 15 average NLL: 0.26859325757251
Fold 16 average NLL: 0.2846100788487303
Fold 17 average NLL: 0.3055905751168851
Fold 18 average NLL: 0.2944038171715241
Fold 19 average NLL: 0.34047009227209507
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_solo_randomintercepts_distancepluswallsep.pickle


In [15]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep + (1|GlmPlayerID)'
(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solo'], model_formula,
                                                    "solo_randomintercepts",
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.20285147039946846
Fold 1 average NLL: 0.2263933697872842
Fold 2 average NLL: 0.31756524404355113
Fold 3 average NLL: 0.25114714446736847
Fold 4 average NLL: 0.2364846416368972
Fold 5 average NLL: 0.24298101717530415
Fold 6 average NLL: 0.2633009458158499
Fold 7 average NLL: 0.2197755936586168
Fold 8 average NLL: 0.2418839716360198
Fold 9 average NLL: 0.23474080552007054
Fold 10 average NLL: 0.22813391310732176
Fold 11 average NLL: 0.23811503179671037
Fold 12 average NLL: 0.22334581980894902
Fold 13 average NLL: 0.2786534536901988
Fold 14 average NLL: 0.2079396282073283
Fold 15 average NLL: 0.22543466481953064
Fold 16 average NLL: 0.2113970233714495
Fold 17 average NLL: 0.20681984168826748
Fold 18 average NLL: 0.20859923112548515
Fold 19 average NLL: 0.2734534925621421
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_solo_randomintercepts.pickle


In [16]:
# model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + (1|GlmPlayerID)'
# (model_avg_nll, models,
#   predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solo'], model_formula,
#                                                     "solo_randomintercepts_distanceplusfirstseen",
#                                                       save_results=True)

In [17]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep + D2H:FirstSeenWall + D2L:FirstSeenWall + (1|GlmPlayerID)'
(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solo'], model_formula,
                                                    "solo_randomintercepts_distancesinteraction",
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.20014334738980757
Fold 1 average NLL: 0.22826602950924246
Fold 2 average NLL: 0.307845133924021
Fold 3 average NLL: 0.24507122910456555
Fold 4 average NLL: 0.2438019588134818
Fold 5 average NLL: 0.23714137857280604
Fold 6 average NLL: 0.26320412613176597
Fold 7 average NLL: 0.2204737821270961
Fold 8 average NLL: 0.24904223217847465
Fold 9 average NLL: 0.2339755286189808
Fold 10 average NLL: 0.22266198046187594
Fold 11 average NLL: 0.2357499139886113
Fold 12 average NLL: 0.22613318034029678
Fold 13 average NLL: 0.279093008346245
Fold 14 average NLL: 0.20224942165563306
Fold 15 average NLL: 0.22382585634688837
Fold 16 average NLL: 0.21301978840790503
Fold 17 average NLL: 0.20534457179026616
Fold 18 average NLL: 0.20170327232127883
Fold 19 average NLL: 0.2679330591504386
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_solo_randomintercepts_distancesinteraction.pickle


In [18]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep + D2H:FirstSeenWall + D2L:FirstSeenWall + FirstSeenWall:WallSep + (1|GlmPlayerID)'
(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solo'], model_formula,
                                                    "solo_randomintercepts_distancesandwallsepinteraction",
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.19983985610162316
Fold 1 average NLL: 0.22225468605815224
Fold 2 average NLL: 0.29599871868518746
Fold 3 average NLL: 0.23961011133719282
Fold 4 average NLL: 0.24788881899051185
Fold 5 average NLL: 0.22847053132685624
Fold 6 average NLL: 0.2512964568219205
Fold 7 average NLL: 0.20964622263624086
Fold 8 average NLL: 0.24624713283818853
Fold 9 average NLL: 0.23034741399607792
Fold 10 average NLL: 0.2183287443182414
Fold 11 average NLL: 0.23096511481285875
Fold 12 average NLL: 0.22073584077559202
Fold 13 average NLL: 0.2877775606800399
Fold 14 average NLL: 0.19779430256705754
Fold 15 average NLL: 0.22797895914368246
Fold 16 average NLL: 0.2101812524470797
Fold 17 average NLL: 0.2059602162269392
Fold 18 average NLL: 0.19720896878072364
Fold 19 average NLL: 0.2669010124489579
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_solo_randomintercepts_distancesandwallsepinteraction.pickle


### Social models

In [19]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep + (1|GlmPlayerID)'
(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_social'], model_formula,
                                                    "social_randomintercepts_no-opponentvisible",
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.47886458407968935
Fold 1 average NLL: 0.5101717221371478
Fold 2 average NLL: 0.45981054951761297
Fold 3 average NLL: 0.4784834726465041
Fold 4 average NLL: 0.5168926814148331
Fold 5 average NLL: 0.4826064232037679
Fold 6 average NLL: 0.48179052197164024
Fold 7 average NLL: 0.42618777805030683
Fold 8 average NLL: 0.4485457649073659
Fold 9 average NLL: 0.4227906590838583
Fold 10 average NLL: 0.5147250591523439
Fold 11 average NLL: 0.49595154370430256
Fold 12 average NLL: 0.4591320133622971
Fold 13 average NLL: 0.40945878081657083
Fold 14 average NLL: 0.38033070069043934
Fold 15 average NLL: 0.4770088805106813
Fold 16 average NLL: 0.4541032583588909
Fold 17 average NLL: 0.4456776160908541
Fold 18 average NLL: 0.4641064461052367
Fold 19 average NLL: 0.45705539855553184
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_social_randomintercepts_no-opponentvisible.pickle


In [20]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep + OpponentVisible + (1|GlmPlayerID)'

(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_social'], model_formula,
                                                    "social_randomintercepts_opponentvisible",
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.47720151835010133
Fold 1 average NLL: 0.5077617650645058
Fold 2 average NLL: 0.46143789734928
Fold 3 average NLL: 0.4795587104241059
Fold 4 average NLL: 0.5138781199494487
Fold 5 average NLL: 0.48172668695170145
Fold 6 average NLL: 0.4835048779889405
Fold 7 average NLL: 0.4262175118936428
Fold 8 average NLL: 0.44856353885591177
Fold 9 average NLL: 0.42195935690483
Fold 10 average NLL: 0.5147871763377461
Fold 11 average NLL: 0.4968792918312624
Fold 12 average NLL: 0.4582552513130882
Fold 13 average NLL: 0.4087209601566241
Fold 14 average NLL: 0.3829659950529266
Fold 15 average NLL: 0.47776477006431134
Fold 16 average NLL: 0.45216055304861613
Fold 17 average NLL: 0.445795393000175
Fold 18 average NLL: 0.46346020531228665
Fold 19 average NLL: 0.4585772201790681
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_social_randomintercepts_opponentvisible.pickle


In [21]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep  + OpponentD2H' \
' + OpponentD2L + (1|GlmPlayerID)'

(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_social'], model_formula,
                                                    "social_randomintercepts_no-opponentvisible_opponentdistance",
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.4796181052571466
Fold 1 average NLL: 0.5041478123425852
Fold 2 average NLL: 0.4618371006914584
Fold 3 average NLL: 0.4691043296657425
Fold 4 average NLL: 0.512308530244701
Fold 5 average NLL: 0.4884892639502958
Fold 6 average NLL: 0.48095410042778586
Fold 7 average NLL: 0.41989482680662404
Fold 8 average NLL: 0.43949190838223723
Fold 9 average NLL: 0.42179724424751713
Fold 10 average NLL: 0.5149708718894697
Fold 11 average NLL: 0.4986562997844378
Fold 12 average NLL: 0.44865817500140615
Fold 13 average NLL: 0.4022685814735592
Fold 14 average NLL: 0.37571782296066486
Fold 15 average NLL: 0.46611955461302695
Fold 16 average NLL: 0.4530391119698437
Fold 17 average NLL: 0.44944084075369056
Fold 18 average NLL: 0.4646095069383519
Fold 19 average NLL: 0.45824664159562467
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_social_randomintercepts_no-opponentvisible_opponentdistance.pickle


In [22]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep  + OpponentD2H' \
' + OpponentD2L + OpponentFirstSeenWall + (1|GlmPlayerID)'

(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_social'], model_formula,
                                                    "social_randomintercepts_no-opponentvisible_opponentdistance_opponentfirstseen",
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.4642251211415738
Fold 1 average NLL: 0.4845046815180283
Fold 2 average NLL: 0.4602211522901884
Fold 3 average NLL: 0.48416761883986104
Fold 4 average NLL: 0.5100071791020154
Fold 5 average NLL: 0.45806187712519353
Fold 6 average NLL: 0.4660036317425119
Fold 7 average NLL: 0.37694793386196607
Fold 8 average NLL: 0.41276523195257175
Fold 9 average NLL: 0.3961187058414286
Fold 10 average NLL: 0.5475573365076507
Fold 11 average NLL: 0.4877961190705594
Fold 12 average NLL: 0.43553478623739067
Fold 13 average NLL: 0.38175088435267174
Fold 14 average NLL: 0.34925319368941543
Fold 15 average NLL: 0.4592060501347306
Fold 16 average NLL: 0.44401822696318144
Fold 17 average NLL: 0.3681550350248013
Fold 18 average NLL: 0.4484075340871315
Fold 19 average NLL: 0.46402600858075904
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_social_randomintercepts_no-opponentvisible_opponentdistance_opponentfirstseen.pickle


In [23]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep  + OpponentD2H' \
' + OpponentD2L + OpponentFirstSeenWall + FirstSeenWall:WallSep + D2H:FirstSeenWall + D2L:FirstSeenWall + (1|GlmPlayerID)'

(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_social'], model_formula,
                                                    "social_randomintercepts_no-opponentvisible_opponentdistance_opponentfirstseen_solointeractions",
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.4527064245347332
Fold 1 average NLL: 0.46475196497472515
Fold 2 average NLL: 0.4435115779308146
Fold 3 average NLL: 0.45440932276182455
Fold 4 average NLL: 0.49354770077336696
Fold 5 average NLL: 0.4418202295023632
Fold 6 average NLL: 0.42614608899070755
Fold 7 average NLL: 0.3741041650436613
Fold 8 average NLL: 0.3874470875550091
Fold 9 average NLL: 0.3835151211270888
Fold 10 average NLL: 0.5169562917699294
Fold 11 average NLL: 0.48980570209962226
Fold 12 average NLL: 0.4283383920982508
Fold 13 average NLL: 0.36661623458610404
Fold 14 average NLL: 0.3238844412318699
Fold 15 average NLL: 0.4420439109382859
Fold 16 average NLL: 0.4491054417170753
Fold 17 average NLL: 0.35465948381951007
Fold 18 average NLL: 0.43694320136344905
Fold 19 average NLL: 0.46866893399549026
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_social_randomintercepts_no-opponentvisible_opponentdistance_opponentfirstseen_solointeractions.pickle


In [24]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep  + OpponentD2H' \
' + OpponentD2L + OpponentFirstSeenWall + FirstSeenWall:WallSep + D2H:FirstSeenWall + D2L:FirstSeenWall' \
' + D2H:OpponentD2H + D2L:OpponentD2L + (1|GlmPlayerID)'

(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_social'], model_formula,
                                                    "social_randomintercepts_no-opponentvisible_opponentdistance_opponentfirstseen_solo-and-social-interactions",
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.45182695428226055
Fold 1 average NLL: 0.4633561612911289
Fold 2 average NLL: 0.4451211193394005
Fold 3 average NLL: 0.4572974045282722
Fold 4 average NLL: 0.4915912860241089
Fold 5 average NLL: 0.43996626048944776
Fold 6 average NLL: 0.4253468855584289
Fold 7 average NLL: 0.3753868146501323
Fold 8 average NLL: 0.38857762142362223
Fold 9 average NLL: 0.38212783537785594
Fold 10 average NLL: 0.5180694921678508
Fold 11 average NLL: 0.49287443204984116
Fold 12 average NLL: 0.43147365168821766
Fold 13 average NLL: 0.36570525398572357
Fold 14 average NLL: 0.32504886176977543
Fold 15 average NLL: 0.440584143342159
Fold 16 average NLL: 0.44864865530206505
Fold 17 average NLL: 0.35485340419412253
Fold 18 average NLL: 0.43663420315951373
Fold 19 average NLL: 0.46856677867713864
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_social_randomintercepts_no-opponentvisible_opponentdistance_opponentfirstseen_solo-and-social-interactions.pickle


### Solo-social models

In [25]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep + (1 |GlmPlayerID)'
(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solosocial'], model_formula,
                                                    "solosocial_randomintercepts", 
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.44323099397258114
Fold 1 average NLL: 0.4086499953412541
Fold 2 average NLL: 0.44923306186286266
Fold 3 average NLL: 0.44458361398484164
Fold 4 average NLL: 0.4008870885123465
Fold 5 average NLL: 0.3765003028731802
Fold 6 average NLL: 0.44218749475310126
Fold 7 average NLL: 0.4870591465294052
Fold 8 average NLL: 0.44195917475551594
Fold 9 average NLL: 0.40260156832320965
Fold 10 average NLL: 0.41531309159036767
Fold 11 average NLL: 0.4374880427427129
Fold 12 average NLL: 0.43156253283252405
Fold 13 average NLL: 0.41741624699418894
Fold 14 average NLL: 0.38879645885512537
Fold 15 average NLL: 0.4650907073892123
Fold 16 average NLL: 0.4161268917373218
Fold 17 average NLL: 0.4586117770733195
Fold 18 average NLL: 0.4278904590960472
Fold 19 average NLL: 0.39610183418212785
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_solosocial_randomintercepts.pickle


In [11]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep + SocialContext + (1 |GlmPlayerID)'
(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solosocial'], model_formula,
                                                    "solosocial_randomintercepts_plus-socialcontext", 
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.3850923849865695
Fold 1 average NLL: 0.3619628350628039
Fold 2 average NLL: 0.38782186357127557
Fold 3 average NLL: 0.40135801028969126
Fold 4 average NLL: 0.35853662184157514
Fold 5 average NLL: 0.3346764733478181
Fold 6 average NLL: 0.374330415897285
Fold 7 average NLL: 0.43506793657638404
Fold 8 average NLL: 0.3983580202947819
Fold 9 average NLL: 0.348554248564584
Fold 10 average NLL: 0.35672136242462166
Fold 11 average NLL: 0.3774526769924696
Fold 12 average NLL: 0.3634760687133969
Fold 13 average NLL: 0.39281605171814254
Fold 14 average NLL: 0.33511784346324325
Fold 15 average NLL: 0.4271343635495151
Fold 16 average NLL: 0.35028298361724003
Fold 17 average NLL: 0.3976196574853607
Fold 18 average NLL: 0.37441650776793767
Fold 19 average NLL: 0.3617709239689128
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_solosocial_randomintercepts_plus-socialcontext.pickle


In [27]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep + SocialContext +' \
' FirstSeenWall:WallSep + D2H:FirstSeenWall + D2L:FirstSeenWall + (1 |GlmPlayerID)'
(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solosocial'], model_formula,
                                                    "solosocial_randomintercepts_socialcontext_fullsolointeractions", 
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.3877711902246475
Fold 1 average NLL: 0.3412161407484567
Fold 2 average NLL: 0.37322292709858423
Fold 3 average NLL: 0.405168197943988
Fold 4 average NLL: 0.3574183093970005
Fold 5 average NLL: 0.31961068764726697
Fold 6 average NLL: 0.34659225941369853
Fold 7 average NLL: 0.42223155762583936
Fold 8 average NLL: 0.3712344268257304
Fold 9 average NLL: 0.3464098254736178
Fold 10 average NLL: 0.3430317641303928
Fold 11 average NLL: 0.3548644623883942
Fold 12 average NLL: 0.3544705216949792
Fold 13 average NLL: 0.3802617640797236
Fold 14 average NLL: 0.3284275322901007
Fold 15 average NLL: 0.4146573361352972
Fold 16 average NLL: 0.3291529284485199
Fold 17 average NLL: 0.38490527046067585
Fold 18 average NLL: 0.36747336157825367
Fold 19 average NLL: 0.3670216013839426
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_solosocial_randomintercepts_socialcontext_fullsolointeractions.pickle


In [12]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep + SocialContext + SocialContext:FirstSeenWall + (1 |GlmPlayerID)'
(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solosocial'], model_formula,
                                                    "solosocial_randomintercepts_plus-socialcontext_fsw-interactions", 
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.38462751422270475
Fold 1 average NLL: 0.3623434498906241
Fold 2 average NLL: 0.38735028091271817
Fold 3 average NLL: 0.401254596151485
Fold 4 average NLL: 0.3609686950444204
Fold 5 average NLL: 0.33401574937235023
Fold 6 average NLL: 0.3751396532772792
Fold 7 average NLL: 0.43611068985186213
Fold 8 average NLL: 0.3974478273854111
Fold 9 average NLL: 0.34890755580162514
Fold 10 average NLL: 0.3559993893805863
Fold 11 average NLL: 0.37687439767265263
Fold 12 average NLL: 0.36235064502826764
Fold 13 average NLL: 0.3926059069332869
Fold 14 average NLL: 0.33469360320182556
Fold 15 average NLL: 0.4272139743412131
Fold 16 average NLL: 0.3506503862318063
Fold 17 average NLL: 0.3980249750191527
Fold 18 average NLL: 0.37428339689743084
Fold 19 average NLL: 0.36123749865388227
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_solosocial_randomintercepts_plus-socialcontext_fsw-interactions.pickle


In [29]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep + SocialContext + D2H:SocialContext + D2L:SocialContext + (1 |GlmPlayerID)'
(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solosocial'], model_formula,
                                                    "solosocial_randomintercepts_plus-socialcontext_fsw-interactions_socialcontext-dist_interactions", 
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.3859629430707765
Fold 1 average NLL: 0.35639297143834514
Fold 2 average NLL: 0.3800264651313039
Fold 3 average NLL: 0.4014309706895225
Fold 4 average NLL: 0.3572849923803905
Fold 5 average NLL: 0.3281122889315305
Fold 6 average NLL: 0.3671101499765658
Fold 7 average NLL: 0.42789533471021435
Fold 8 average NLL: 0.3938065051239211
Fold 9 average NLL: 0.3508724729495045
Fold 10 average NLL: 0.34666627046301063
Fold 11 average NLL: 0.37569899242309684
Fold 12 average NLL: 0.3617166379312593
Fold 13 average NLL: 0.38656083132266
Fold 14 average NLL: 0.33312364185723353
Fold 15 average NLL: 0.41434050713862425
Fold 16 average NLL: 0.34395437493995284
Fold 17 average NLL: 0.3895402606000611
Fold 18 average NLL: 0.3687687654480728
Fold 19 average NLL: 0.36171492071673267
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_solosocial_randomintercepts_plus-socialcontext_fsw-interactions_socialcontext-dist_interactions.pickle


In [30]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep + SocialContext + D2H:SocialContext + D2L:SocialContext ' \
'+ FirstSeenWall:WallSep + D2H:FirstSeenWall + D2L:FirstSeenWall + (1 |GlmPlayerID)'
(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solosocial'], model_formula,
                                                    "solosocial_randomintercepts_plus-socialcontext_fsw-interactions_socialcontext-dist-interactions_solo-interactions", 
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.3905286703419893
Fold 1 average NLL: 0.3363343962978067
Fold 2 average NLL: 0.368864035412811
Fold 3 average NLL: 0.40403823499921976
Fold 4 average NLL: 0.3544548150840102
Fold 5 average NLL: 0.31468519593591776
Fold 6 average NLL: 0.342205679853645
Fold 7 average NLL: 0.41672922384811273
Fold 8 average NLL: 0.36648224059991036
Fold 9 average NLL: 0.34714904172483524
Fold 10 average NLL: 0.3352177819478433
Fold 11 average NLL: 0.352955794532863
Fold 12 average NLL: 0.3527208449862267
Fold 13 average NLL: 0.37449793892335714
Fold 14 average NLL: 0.32642381340001236
Fold 15 average NLL: 0.4051321047728169
Fold 16 average NLL: 0.3248746011771502
Fold 17 average NLL: 0.37722937598894946
Fold 18 average NLL: 0.3624555805285829
Fold 19 average NLL: 0.3651006142650545
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_solosocial_randomintercepts_plus-socialcontext_fsw-interactions_socialcontext-dist-interactions_solo-interactions.pickle
