In [1]:
%load_ext autoreload
%autoreload 2

import os 
# Set the environment for R
if os.environ.get('HOME') == '/home/tom':                                           # Linux
    os.environ['R_HOME'] = '/home/tom/miniconda3/envs/octagon_analysis/lib/R'           # Lab desktop

elif os.environ.get('HOME') is None:                                                # Windows
    if os.environ.get('USERPROFILE') == r'C:\Users\tomha':
        os.environ['R_HOME']= r'C:\Users\tomha\miniconda3\envs\octagon_analysis\lib\R'  # Laptop
    elif os.environ.get('USERPROFILE') == r'C:\Users\Tom':
        os.environ['R_HOME']=r'D:\Users\Tom\miniconda3\envs\octagon_analysis\lib\R'     # Home desktop
# elif:
    # append other machines here


import rpy2.robjects as robjects
print(robjects.r('R.version.string'))

import numpy as np
import pandas as pd
from ipywidgets import IntProgress
from IPython.display import display
from pymer4.models import Lmer
import populate_dataframes
import re




[1] "R version 4.1.3 (2022-03-10)"



In [2]:
k = 20
new_split_dataframes = False

### load data

In [3]:
import pickle

analysis_dir = os.path.join('..', 'data')
# analysis_file = 'analysis_results_2levelsFirstSeenWall_normalisedWallSep.pkl'
analysis_file = 'analysis_results_3levelsFirstSeenWall_normalisedWallSep.pkl'
filename = os.path.join(analysis_dir, analysis_file)
# load the analysis results
with open(filename, 'rb') as f:
    analysis_results = pickle.load(f)

In [4]:
analysis_results.keys()


dict_keys([np.int64(0), np.int64(1), np.int64(2), np.int64(3), np.int64(4), np.int64(5), np.int64(6), np.int64(7), np.int64(8), np.int64(9), np.int64(10), np.int64(11), np.int64(12), np.int64(13), np.int64(14), np.int64(15), np.int64(16), np.int64(17), np.int64(18), np.int64(19), np.int64(20), np.int64(21), np.int64(22), np.int64(23), np.int64(24), np.int64(25), np.int64(26), np.int64(27), np.int64(28), np.int64(29), np.int64(30), np.int64(31), np.int64(32), np.int64(33), np.int64(34), np.int64(35), np.int64(36), np.int64(37), np.int64(38), np.int64(39), np.int64(40), np.int64(41), np.int64(42), np.int64(43), np.int64(44), np.int64(45), np.int64(46), np.int64(47), np.int64(48), np.int64(49)])

### populate dataframes for glm input

In [5]:
# populate dataframes for solo, solosocial, and social analysis_type
glm_df_solo = populate_dataframes.populate_dataframe(analysis_results, analysis_type='solo')
glm_df_solosocial = populate_dataframes.populate_dataframe(analysis_results, analysis_type='solosocial')
glm_df_social = populate_dataframes.populate_dataframe(analysis_results, analysis_type='social')


### create reference to dataframes

In [6]:
glm_df_social['WallSep'].value_counts()

WallSep
0.25    5002
0.50    3002
1.00    2680
Name: count, dtype: int64

In [7]:
dataframes = {
    'glm_df_solo': glm_df_solo,
    'glm_df_solosocial': glm_df_solosocial,
    'glm_df_social': glm_df_social
}

### shuffle the dataframes for k-fold index selection


In [8]:
shuffled_dataframes_path = os.path.join('..', 'data', 'k_fold_CV', 'shuffled_dataframes.pkl')

if new_split_dataframes:
    
    # shuffle each dataframe
    for name, df in dataframes.items():
        dataframes[name] = df.sample(frac=1, random_state=17).reset_index(drop=True)

    # pickle save shuffled dataframes to sandbox > data, as one dictionary
    with open(shuffled_dataframes_path, 'wb') as f:
        pickle.dump(dataframes, f)

    shuffled_dataframes = dataframes

else:
    # load the shuffled dataframes
    with open(shuffled_dataframes_path, 'rb') as f:
        shuffled_dataframes = pickle.load(f)



### Split each dataframe into folds

In [9]:
# split each dataframe into k equal parts
split_dataframes = {name: np.array_split(df, k) for name, df in shuffled_dataframes.items()}

  return bound(*args, **kwds)


### Cross-validation functions

In [10]:
from contextlib import redirect_stdout

def fit_models(split_df, model_formula):
    '''
    Takes a dataframe and a model formula, and fits k models to the data.
    Wjere k is the number of folds in the split dataframe.
    Use k-1 folds to train each model'''
    
    models = []
    max_count = len(split_df)
    f = IntProgress(min=0, max=max_count, description='Fitting models')
    display(f)

    # for i, df in enumerate(split_df):
    #     print(f"Fold {i}: Type = {type(df)}")

    # Suppress the output of the models fitting process
    with open(os.devnull, 'w') as fnull:
        with redirect_stdout(fnull):
            for i in range(len(split_df)):
                # Combine all folds except the i-th fold
                train_data = pd.concat([df for j, df in enumerate(split_df) if j != i], ignore_index=True)                
                # Fit the Lmer model to these folds
                model = Lmer(model_formula, data=train_data, family='binomial')
                model.fit(
                    # use the bobyqa optimizer and 10000 iterations
                    control="optimizer='bobyqa', optCtrl=list(maxfun=10000)"
                )
                models.append(model)
                print(f"Model {i} fit with {len(train_data)} rows")
                f.value += 1
    
    return models

def drop_nans_relevant_columns(df, model_formula):
    ''' Remove rows with NaNs in any columns that contain them, if these columns are in the model formula.
    This is to avoid making predictions on rows with NaNs in the relevant columns, which would cause errors.
    And also to avoid making predictions on rows with NaNs in the ground truth column, where
    the ground truth cannot be used to check them '''
    
    # drop rows with nans in any columns that contain them (if these columns are in the model formula)
    nan_containing_cols = list(df.isna().sum()[df.isna().sum() > 0].index)
    nan_containing_cols = [col for col in df.columns if df[col].isna().any() and re.search(col, model_formula)]
    for column in nan_containing_cols:
        pattern = re.compile(column)
        match = re.search(pattern, model_formula)
        # if match and match.group(0) != "ChooseHigh": # 'ChooseHigh' is dependent variable, we don't need to drop this
        if match:
            df = df.dropna(subset=nan_containing_cols)
        else:
            continue


    return df

def calculate_predictions(split_df, models):
    ''' 
    Predict on the held-out fold, for each trained model
    '''

    all_predictions = []
    all_predict_data = []
    
    # for each dataframe type, predict on the held-out fold using the relevant model
    for i, model in enumerate(models):
        
        # predict on the held-out fold
        predict_data = split_df[i].copy()

        # drop rows with nans in any columns that contain them (if these columns are in the model formula)
        # excluding the dependent variable 'ChooseHigh'
        predict_data = drop_nans_relevant_columns(predict_data, model.formula)
        all_predict_data.append(predict_data) # save this to provide data for ground truth later

        predictions = model.predict(predict_data, skip_data_checks=True, verify_predictions=False)

        # concatenate these predictions to the predictions array
        all_predictions.append(predictions)

    return all_predictions, all_predict_data



def calculate_likelihoods(split_df, predictions, model_formula):
    ''' 
    Calculate the likelihood of each prediction given the true output.
    The likelihood is calculated as p^y * (1-p)^(1-y), where p is the predicted probability
    and y is the true output (0 or 1).

    Takes a list of dataframes and a list of predictions, where each array of predictions corresponds
    to the dataframe of the same index.

    Avoid calculating the likelihood for NaN ground truth values. (Social trials without confident inferred
    choices)
    '''
    
    fold_likelihoods = []
    for i, prediction_fold in enumerate(predictions):
        
        predicted_output = np.array(prediction_fold)
        true_output = split_df[i].copy()
        
        # drop rows with nans in any columns that contain them (if these columns are in the model formula)
        # excluding the dependent variable 'ChooseHigh'
        true_output = drop_nans_relevant_columns(true_output, model_formula)

        # only calculate the likelihood for non-NaN ground truth values      
        nonnan_mask = ~np.isnan(true_output['ChooseHigh'])
        true_output = true_output[nonnan_mask]
        try:
            predicted_output = predicted_output[nonnan_mask]
        except Exception as e:
            print(f"Error: {e}")
            print(f"predicted output shape: {predicted_output.shape}, non-nan mask shape: {nonnan_mask.shape}")
            print(f"predicted output: {predicted_output}")
            print(f"nonnan mask: {nonnan_mask}")
            predicted_output = predicted_output[nonnan_mask.to_numpy()]

        # initialise likelihoods array to be the size of the ground truth data once nans have been filtered
        likelihoods = np.full(len(predicted_output), np.nan)


        # calculate the metric for each prediction

        for i, prediction in enumerate(predicted_output):
            ground_truth = true_output.iloc[i]['ChooseHigh']
            likelihood = prediction**ground_truth * (1 - prediction)**(1 - ground_truth)
            likelihoods[i] = likelihood
            if np.isnan(likelihood):
                print(f"NaN likelihood for prediction {prediction} and ground truth {ground_truth}")
                print(f"prediction**ground_truth: {prediction**ground_truth}")
                print(f"(1 - prediction)**(1 - ground_truth): {(1 - prediction)**(1 - ground_truth)}")

        fold_likelihoods.append(likelihoods)

    return fold_likelihoods

def calculate_nlls(fold_likelihoods):
    # #### sum the logs of the likelihoods, and take the negative

    fold_avg_nlls = np.full(len(fold_likelihoods), np.nan)
    fold_all_nlls = []
    for i, prediction_fold in enumerate(fold_likelihoods):
        fold_nlls = -np.log(prediction_fold)
        fold_avg_nll = np.nanmean(fold_nlls)
        print(f"Fold {i} average NLL: {fold_avg_nll}")

        fold_avg_nlls[i] = fold_avg_nll
        fold_all_nlls.append(fold_nlls)

    model_avg_nll = np.nanmean(fold_avg_nlls)

    return fold_all_nlls, fold_avg_nlls, model_avg_nll


def save_cross_validation_results(name, model_formula, split_df, num_folds, predictions,
                                  predict_data, fold_likelihoods, fold_nlls, fold_avg_nlls, model_avg_nll):
    ''' Save the cross-validation results to a file.
     
      Args:
        name (str): Name of the model type.
        model_formula (str): The formula used for the model.
        split_df (list): List of dataframes for each fold.
        num_folds (int): Number of folds in the cross-validation.
        predictions (list): List of arrays of predictions for each fold.
        predict_data (list): List of dataframes used for predictions (different to split_df
        in that all of the rows with nans in relevant prediction columns are removed).
        fold_avg_nlls (np.array): Average negative log likelihoods for each fold.
        model_avg_nll (float): Average negative log likelihood across all folds.
        k (int): Number of folds in the cross-validation.'''
    
    cross_validation_results = {
        "name": name,
        "model_formula" : model_formula,
        "split_df" : split_df,
        "num_folds" : num_folds,
        # "models" : models,
        "predictions" : predictions,
        "predict_data" : predict_data,
        "fold_likelihoods" : fold_likelihoods, 
        "fold_nlls" : fold_nlls,
        "fold_avg_nlls" : fold_avg_nlls,
        "average_nll" : model_avg_nll
    }

   # Save the cross-validation results to a file
    dir = os.path.join('..', 'data', 'k_fold_CV')
    filename = f'{num_folds}-fold-CV_results_{name}.pickle'
    filepath = os.path.join(dir, filename)
    with open(filepath, 'wb') as f:
        pickle.dump(cross_validation_results, f)

    print(f"{num_folds}-fold CV data saved to: ", filepath)

In [11]:
def run_cross_validation(split_df, model_formula, name, save_results=False):
    ''' 
    Run k-fold cross-validation on the given dataframes.
    Returns:
    - model_avg_nll: the average negative log likelihood (NLL) across all folds,
    - models: a list of the fitted model for each fold,
    - predictions: a list of arrays of predictions for each fold,
    - predict_data: a list of dataframes used for predictions for each fold,
    - fold_avg_nlls: a list of the average NLL for each fold.
    '''

    n_folds = len(split_df)

    # Step 1: Fit models on k-1 folds for all iterations
    models = fit_models(split_df, model_formula)

    # Step 2: Calculate predictions on the held-out fold for each model
    predictions, predict_data = calculate_predictions(split_df, models)

    # Step 3: Calculate likelihoods for each prediction
    fold_likelihoods = calculate_likelihoods(split_df, predictions, model_formula)

    # Step 4: Calculate NLLs 
    fold_all_nlls, fold_avg_nlls, model_avg_nll = calculate_nlls(fold_likelihoods)

    # Step 5: Save data to file (optional)
    if save_results:
        save_cross_validation_results(name, model_formula, split_df, n_folds, predictions,
                                       predict_data, fold_likelihoods, fold_all_nlls, fold_avg_nlls, model_avg_nll)

    return model_avg_nll, models, predictions, fold_all_nlls, fold_avg_nlls


### Solo models

In [19]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + (1|GlmPlayerID)'
(model_avg_nll, models,
predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solo'], model_formula,
                                                    "solo_randomintercepts_onlydistance",
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.2766952126535541
Fold 1 average NLL: 0.29547239931306646
Fold 2 average NLL: 0.3520194108999301
Fold 3 average NLL: 0.3050664831883057
Fold 4 average NLL: 0.2790347374399119
Fold 5 average NLL: 0.3270675305215161
Fold 6 average NLL: 0.3321872328927074
Fold 7 average NLL: 0.2911329369751824
Fold 8 average NLL: 0.2918816929923042
Fold 9 average NLL: 0.3353433414294066
Fold 10 average NLL: 0.2609944296621782
Fold 11 average NLL: 0.31158630397958414
Fold 12 average NLL: 0.3202526058032808
Fold 13 average NLL: 0.3755990021386372
Fold 14 average NLL: 0.264802398618293
Fold 15 average NLL: 0.28698694428365334
Fold 16 average NLL: 0.284253841106489
Fold 17 average NLL: 0.32131389415869205
Fold 18 average NLL: 0.3050602741686595
Fold 19 average NLL: 0.34629379605398863
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_solo_randomintercepts_onlydistance.pickle


In [20]:
np.sum(fold_all_nlls[8]), fold_avg_nlls[8]*len(fold_all_nlls[8])

(np.float64(80.85122895886826), np.float64(80.85122895886826))

In [21]:
# model_formula = 'ChooseHigh ~ 1 + FirstSeenWall + (1|GlmPlayerID)'
# (model_avg_nll, models,
#   predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solo'], model_formula,
#                                                     "solo_randomintercepts_firstseenonly",
#                                                       save_results=True)

In [22]:
# model_formula = 'ChooseHigh ~ 1 + WallSep + FirstSeenWall + (1|GlmPlayerID)'
# (model_avg_nll, models,
#   predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solo'], model_formula,
#                                                     "solo_randomintercepts_wallsep_firstseen",
#                                                       save_results=True)

In [23]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + WallSep + (1|GlmPlayerID)'
(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solo'], model_formula,
                                                    "solo_randomintercepts_distancepluswallsep",
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.2608716500057588
Fold 1 average NLL: 0.28423616423119086
Fold 2 average NLL: 0.3537761857473281
Fold 3 average NLL: 0.2765300217136157
Fold 4 average NLL: 0.2683716285721701
Fold 5 average NLL: 0.3254190845840474
Fold 6 average NLL: 0.32327703503246646
Fold 7 average NLL: 0.28283360254282547
Fold 8 average NLL: 0.2721974437856051
Fold 9 average NLL: 0.31628012609647604
Fold 10 average NLL: 0.264777550572167
Fold 11 average NLL: 0.3004453465411507
Fold 12 average NLL: 0.2994899022599431
Fold 13 average NLL: 0.3555282737893079
Fold 14 average NLL: 0.2575248172073129
Fold 15 average NLL: 0.26859325757251
Fold 16 average NLL: 0.2846100788487303
Fold 17 average NLL: 0.3055905751168851
Fold 18 average NLL: 0.2944038171715241
Fold 19 average NLL: 0.34047009227209507
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_solo_randomintercepts_distancepluswallsep.pickle


In [24]:
len(predictions)

20

In [25]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep + (1|GlmPlayerID)'
(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solo'], model_formula,
                                                    "solo_randomintercepts",
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.20285147039946846
Fold 1 average NLL: 0.2263933697872842
Fold 2 average NLL: 0.31756524404355113
Fold 3 average NLL: 0.25114714446736847
Fold 4 average NLL: 0.2364846416368972
Fold 5 average NLL: 0.24298101717530415
Fold 6 average NLL: 0.2633009458158499
Fold 7 average NLL: 0.2197755936586168
Fold 8 average NLL: 0.2418839716360198
Fold 9 average NLL: 0.23474080552007054
Fold 10 average NLL: 0.22813391310732176
Fold 11 average NLL: 0.23811503179671037
Fold 12 average NLL: 0.22334581980894902
Fold 13 average NLL: 0.2786534536901988
Fold 14 average NLL: 0.2079396282073283
Fold 15 average NLL: 0.22543466481953064
Fold 16 average NLL: 0.2113970233714495
Fold 17 average NLL: 0.20681984168826748
Fold 18 average NLL: 0.20859923112548515
Fold 19 average NLL: 0.2734534925621421
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_solo_randomintercepts.pickle


In [17]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep + FirstSeenWall:WallSep + (1|GlmPlayerID)'
(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solo'], model_formula,
                                                    "solo_randomintercepts_wallsepinteraction",
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.20378994668466527
Fold 1 average NLL: 0.22325273833867923
Fold 2 average NLL: 0.30700590566964375
Fold 3 average NLL: 0.250035709680835
Fold 4 average NLL: 0.24367203405935073
Fold 5 average NLL: 0.2375567203733049
Fold 6 average NLL: 0.2527393285515054
Fold 7 average NLL: 0.21064807296469332
Fold 8 average NLL: 0.23911680333514826
Fold 9 average NLL: 0.23101453431554378
Fold 10 average NLL: 0.22628471411303924
Fold 11 average NLL: 0.2338457392752699
Fold 12 average NLL: 0.2179574922057977
Fold 13 average NLL: 0.28565734175618934
Fold 14 average NLL: 0.20589282971960773
Fold 15 average NLL: 0.22751976050456224
Fold 16 average NLL: 0.21000975682776987
Fold 17 average NLL: 0.20787085592117052
Fold 18 average NLL: 0.20320329357847322
Fold 19 average NLL: 0.2717326656589293
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_solo_randomintercepts_wallsepinteraction.pickle


In [18]:
# model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + (1|GlmPlayerID)'
# (model_avg_nll, models,
#   predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solo'], model_formula,
#                                                     "solo_randomintercepts_distanceplusfirstseen",
#                                                       save_results=True)

In [26]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep + D2H:FirstSeenWall + D2L:FirstSeenWall + (1|GlmPlayerID)'
(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solo'], model_formula,
                                                    "solo_randomintercepts_distancesinteraction",
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.20014334738980757
Fold 1 average NLL: 0.22826602950924246
Fold 2 average NLL: 0.307845133924021
Fold 3 average NLL: 0.24507122910456555
Fold 4 average NLL: 0.2438019897116798
Fold 5 average NLL: 0.23714137857280604
Fold 6 average NLL: 0.26320412613176597
Fold 7 average NLL: 0.2204737821270961
Fold 8 average NLL: 0.24904223217847465
Fold 9 average NLL: 0.2339755286189808
Fold 10 average NLL: 0.22266198046187594
Fold 11 average NLL: 0.2357499139886113
Fold 12 average NLL: 0.22613318034029678
Fold 13 average NLL: 0.279093008346245
Fold 14 average NLL: 0.20224942165563306
Fold 15 average NLL: 0.22382585634688837
Fold 16 average NLL: 0.21301978840790503
Fold 17 average NLL: 0.20534457179026616
Fold 18 average NLL: 0.20170327232127883
Fold 19 average NLL: 0.2679330591504386
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_solo_randomintercepts_distancesinteraction.pickle


In [27]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep + D2H:FirstSeenWall + D2L:FirstSeenWall + FirstSeenWall:WallSep + (1|GlmPlayerID)'
(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solo'], model_formula,
                                                    "solo_randomintercepts_distancesandwallsepinteraction",
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.19983985610162316
Fold 1 average NLL: 0.22225468605815224
Fold 2 average NLL: 0.29599871868518746
Fold 3 average NLL: 0.23961011133719282
Fold 4 average NLL: 0.24788881899051185
Fold 5 average NLL: 0.22847053132685624
Fold 6 average NLL: 0.2512964568219205
Fold 7 average NLL: 0.20964622263624086
Fold 8 average NLL: 0.24624713283818853
Fold 9 average NLL: 0.23034741399607792
Fold 10 average NLL: 0.2183287443182414
Fold 11 average NLL: 0.23096511481285875
Fold 12 average NLL: 0.22073584077559202
Fold 13 average NLL: 0.2877775606800399
Fold 14 average NLL: 0.19779430256705754
Fold 15 average NLL: 0.22797895914368246
Fold 16 average NLL: 0.2101812524470797
Fold 17 average NLL: 0.2059602162269392
Fold 18 average NLL: 0.19720896878072364
Fold 19 average NLL: 0.2669009647769958
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_solo_randomintercepts_distancesandwallsepinteraction.pickle


In [18]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + FirstSeenWall:WallSep + (1|GlmPlayerID)'
(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solo'], model_formula,
                                                    "solo_randomintercepts_wallsepinteraction_nowallsep",
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.20378993992729946
Fold 1 average NLL: 0.22325276512418635
Fold 2 average NLL: 0.3070059368134946
Fold 3 average NLL: 0.2500357513921509
Fold 4 average NLL: 0.24367203703041188
Fold 5 average NLL: 0.23755669402941312
Fold 6 average NLL: 0.2527393343314195
Fold 7 average NLL: 0.21064807314327347
Fold 8 average NLL: 0.2391168100305247
Fold 9 average NLL: 0.23101454346807185
Fold 10 average NLL: 0.22628468964410334
Fold 11 average NLL: 0.23384574025383378
Fold 12 average NLL: 0.21795749556519758
Fold 13 average NLL: 0.28565734184828656
Fold 14 average NLL: 0.20589281440831755
Fold 15 average NLL: 0.22751975554906306
Fold 16 average NLL: 0.21000974481078227
Fold 17 average NLL: 0.20787084889645133
Fold 18 average NLL: 0.20320328188648162
Fold 19 average NLL: 0.2717326608434453
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_solo_randomintercepts_wallsepinteraction_nowallsep.pickle


### Social models

In [11]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep + D2H:FirstSeenWall + D2L:FirstSeenWall + FirstSeenWall:WallSep + (1|GlmPlayerID)'
(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_social'], model_formula,
                                                    "social_randomintercepts_bestsolomodel",
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.4751931456919329
Fold 1 average NLL: 0.47663962575468505
Fold 2 average NLL: 0.45577030724957235
Fold 3 average NLL: 0.45303921745097064
Fold 4 average NLL: 0.5004359738783594
Fold 5 average NLL: 0.4622226258552302
Fold 6 average NLL: 0.4453357040885455
Fold 7 average NLL: 0.4084156967199007
Fold 8 average NLL: 0.4298405716443027
Fold 9 average NLL: 0.40773188481009315
Fold 10 average NLL: 0.489615672163937
Fold 11 average NLL: 0.4985930091597386
Fold 12 average NLL: 0.4483834525740958
Fold 13 average NLL: 0.39740868356262526
Fold 14 average NLL: 0.3502134616959384
Fold 15 average NLL: 0.45595055096840015
Fold 16 average NLL: 0.4410751015961143
Fold 17 average NLL: 0.42958941661583516
Fold 18 average NLL: 0.4474993173996655
Fold 19 average NLL: 0.45614070695012
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_social_randomintercepts_bestsolomodel.pickle


In [12]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep + D2H:FirstSeenWall \
    + D2L:FirstSeenWall + FirstSeenWall:WallSep + OpponentVisible + (1|GlmPlayerID)'

(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_social'], model_formula,
                                                    "social_randomintercepts_bestsolo_opponentvisible",
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.47396436727609886
Fold 1 average NLL: 0.47538748625288785
Fold 2 average NLL: 0.4566651665594501
Fold 3 average NLL: 0.4539333345019922
Fold 4 average NLL: 0.4987968593268262
Fold 5 average NLL: 0.46179428093908725
Fold 6 average NLL: 0.44754449666833745
Fold 7 average NLL: 0.4086496487237087
Fold 8 average NLL: 0.4297021385416864
Fold 9 average NLL: 0.40747695340090834
Fold 10 average NLL: 0.4897868693689682
Fold 11 average NLL: 0.4989989120606497
Fold 12 average NLL: 0.4478847278054629
Fold 13 average NLL: 0.397206565517138
Fold 14 average NLL: 0.3518447087929723
Fold 15 average NLL: 0.45572273034985666
Fold 16 average NLL: 0.4397174327740685
Fold 17 average NLL: 0.4297513109982603
Fold 18 average NLL: 0.44757406439865244
Fold 19 average NLL: 0.4568264950594689
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_social_randomintercepts_bestsolo_opponentvisible.pickle


In [13]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep + D2H:FirstSeenWall \
  + D2L:FirstSeenWall + FirstSeenWall:WallSep + OpponentD2L + OpponentD2H + (1|GlmPlayerID)'

(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_social'], model_formula,
                                                    "social_randomintercepts_bestsolo_opponentdistance",
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.47636445122533105
Fold 1 average NLL: 0.47064181706320857
Fold 2 average NLL: 0.45694116668621626
Fold 3 average NLL: 0.4424432744130312
Fold 4 average NLL: 0.4953717382736513
Fold 5 average NLL: 0.46860193943525663
Fold 6 average NLL: 0.4433414321044491
Fold 7 average NLL: 0.4005737443511836
Fold 8 average NLL: 0.4203754871438912
Fold 9 average NLL: 0.406494202224009
Fold 10 average NLL: 0.49047324808387793
Fold 11 average NLL: 0.5015796404578784
Fold 12 average NLL: 0.43816946665044365
Fold 13 average NLL: 0.38938813230992364
Fold 14 average NLL: 0.34550751263733936
Fold 15 average NLL: 0.44591940755684245
Fold 16 average NLL: 0.4417617393143986
Fold 17 average NLL: 0.4317217705490126
Fold 18 average NLL: 0.44601756495338446
Fold 19 average NLL: 0.45891457763687615
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_social_randomintercepts_bestsolo_opponentdistance.pickle


In [14]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep + D2H:FirstSeenWall \
  + D2L:FirstSeenWall + FirstSeenWall:WallSep + OpponentD2L + OpponentD2H + \
     OpponentFirstSeenWall + (1|GlmPlayerID)'

(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_social'], model_formula,
                                                    "social_randomintercepts_bestsolo_opponentdistance_opponentfirstseen",
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.452706387871175
Fold 1 average NLL: 0.46475196690072973
Fold 2 average NLL: 0.44351151614196827
Fold 3 average NLL: 0.4544092874719606
Fold 4 average NLL: 0.49354751014675874
Fold 5 average NLL: 0.4418203134679448
Fold 6 average NLL: 0.42614620919918783
Fold 7 average NLL: 0.37410424049546914
Fold 8 average NLL: 0.3874470924619482
Fold 9 average NLL: 0.38351524047961966
Fold 10 average NLL: 0.5169562352554643
Fold 11 average NLL: 0.489805645701984
Fold 12 average NLL: 0.42833852328639793
Fold 13 average NLL: 0.3666162313737495
Fold 14 average NLL: 0.3238843849500479
Fold 15 average NLL: 0.4420437696357614
Fold 16 average NLL: 0.4491055739316145
Fold 17 average NLL: 0.35465953645492765
Fold 18 average NLL: 0.4369431432138068
Fold 19 average NLL: 0.468668988658836
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_social_randomintercepts_bestsolo_opponentdistance_opponentfirstseen.pickle


In [15]:
# model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep  + OpponentD2H' \
# ' + OpponentD2L + OpponentFirstSeenWall + FirstSeenWall:WallSep + D2H:FirstSeenWall + D2L:FirstSeenWall + (1|GlmPlayerID)'

# (model_avg_nll, models,
#   predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_social'], model_formula,
#                                                     "social_randomintercepts_no-opponentvisible_opponentdistance_opponentfirstseen_solointeractions",
#                                                       save_results=True)

In [16]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep  +  + FirstSeenWall:WallSep + D2H:FirstSeenWall + D2L:FirstSeenWall +' \
' + OpponentD2H + OpponentD2L + OpponentFirstSeenWall' \
' + D2H:OpponentD2H + D2L:OpponentD2L + (1|GlmPlayerID)'

(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_social'], model_formula,
                                                    "social_randomintercepts_bestsolo_opponentdistance_opponentfirstseen_socialdistinteractions",
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.45182695428226055
Fold 1 average NLL: 0.4633562146135017
Fold 2 average NLL: 0.4451211025640971
Fold 3 average NLL: 0.4572972486984334
Fold 4 average NLL: 0.4915909623607254
Fold 5 average NLL: 0.43996626448668175
Fold 6 average NLL: 0.4253468855584289
Fold 7 average NLL: 0.37538668689109633
Fold 8 average NLL: 0.3885775459746287
Fold 9 average NLL: 0.38212798094916217
Fold 10 average NLL: 0.5180695497783088
Fold 11 average NLL: 0.49287443204984116
Fold 12 average NLL: 0.4314738228471011
Fold 13 average NLL: 0.3657051785226947
Fold 14 average NLL: 0.32504872122832895
Fold 15 average NLL: 0.4405841026447561
Fold 16 average NLL: 0.44864868371261885
Fold 17 average NLL: 0.35485352947760324
Fold 18 average NLL: 0.43663427755273093
Fold 19 average NLL: 0.46856667797313933
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_social_randomintercepts_bestsolo_opponentdistance_opponentfirstseen_socialdistinteractions.pickle


### Solo-social models

In [31]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep + D2H:FirstSeenWall + D2L:FirstSeenWall + FirstSeenWall:WallSep + (1 |GlmPlayerID)'
(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solosocial'], model_formula,
                                                    "solosocial_randomintercepts_bestsolomodel", 
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.4463617179875169
Fold 1 average NLL: 0.38893860254387647
Fold 2 average NLL: 0.43191533862804493
Fold 3 average NLL: 0.4521986380649368
Fold 4 average NLL: 0.40138949371093274
Fold 5 average NLL: 0.3616687662077192
Fold 6 average NLL: 0.41800925140950645
Fold 7 average NLL: 0.47663319997513254
Fold 8 average NLL: 0.4170050402657199
Fold 9 average NLL: 0.40006254587452805
Fold 10 average NLL: 0.4030287137389233
Fold 11 average NLL: 0.4130576063747779
Fold 12 average NLL: 0.42347952949299633
Fold 13 average NLL: 0.4067966831230869
Fold 14 average NLL: 0.3824206192345322
Fold 15 average NLL: 0.4544003030017002
Fold 16 average NLL: 0.3970292037356399
Fold 17 average NLL: 0.44516696177091636
Fold 18 average NLL: 0.42056155259693867
Fold 19 average NLL: 0.40116046967149727
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_solosocial_randomintercepts_bestsolomodel.pickle


In [32]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep + D2H:FirstSeenWall + D2L:FirstSeenWall + FirstSeenWall:WallSep + (1 |GlmPlayerID)' \
                + ' + SocialContext'
(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solosocial'], model_formula,
                                                    "solosocial_randomintercepts_bestsolomodel_socialcontext", 
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.38777122093177013
Fold 1 average NLL: 0.3412161091699096
Fold 2 average NLL: 0.37322286661203685
Fold 3 average NLL: 0.4051682253360674
Fold 4 average NLL: 0.35741830279403325
Fold 5 average NLL: 0.3196107496364403
Fold 6 average NLL: 0.3465923540772242
Fold 7 average NLL: 0.4222315713257453
Fold 8 average NLL: 0.37123457049067254
Fold 9 average NLL: 0.3464097015372949
Fold 10 average NLL: 0.3430317238572632
Fold 11 average NLL: 0.35486453367088655
Fold 12 average NLL: 0.35447055421227297
Fold 13 average NLL: 0.3802617633853407
Fold 14 average NLL: 0.3284275496261719
Fold 15 average NLL: 0.414657410795059
Fold 16 average NLL: 0.32915292989867373
Fold 17 average NLL: 0.38490525285082866
Fold 18 average NLL: 0.3674733298361879
Fold 19 average NLL: 0.3670216180749687
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_solosocial_randomintercepts_bestsolomodel_socialcontext.pickle


In [33]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep + D2H:FirstSeenWall + D2L:FirstSeenWall + FirstSeenWall:WallSep + (1 |GlmPlayerID)' \
                + ' + SocialContext + SocialContext:FirstSeenWall + SocialContext:WallSep + SocialContext:D2H + SocialContext:D2L'
(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solosocial'], model_formula,
                                                    "solosocial_randomintercepts_bestsolomodel_socialcontext_allinteractions", 
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.3870080111337435
Fold 1 average NLL: 0.3325287685648883
Fold 2 average NLL: 0.3687354280752905
Fold 3 average NLL: 0.40213236997069457
Fold 4 average NLL: 0.35145907442123364
Fold 5 average NLL: 0.31527796865154123
Fold 6 average NLL: 0.3450361399349646
Fold 7 average NLL: 0.41445196794468003
Fold 8 average NLL: 0.3641337465014102
Fold 9 average NLL: 0.3430998327164872
Fold 10 average NLL: 0.3363119011134161
Fold 11 average NLL: 0.3539822275697298
Fold 12 average NLL: 0.3519247391567121
Fold 13 average NLL: 0.3737798881336077
Fold 14 average NLL: 0.32744626679939626
Fold 15 average NLL: 0.4046942352482743
Fold 16 average NLL: 0.3263219288451895
Fold 17 average NLL: 0.37745223229928954
Fold 18 average NLL: 0.36314165831557527
Fold 19 average NLL: 0.3657600803817109
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_solosocial_randomintercepts_bestsolomodel_socialcontext_allinteractions.pickle


In [34]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep + D2H:FirstSeenWall + D2L:FirstSeenWall + FirstSeenWall:WallSep + (1 |GlmPlayerID)' \
                + ' + SocialContext + SocialContext:FirstSeenWall'
(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solosocial'], model_formula,
                                                    "solosocial_randomintercepts_bestsolomodel_socialcontext_SC-FSW-interactions", 
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.38736803135956505
Fold 1 average NLL: 0.3418323968353272
Fold 2 average NLL: 0.3727148581524858
Fold 3 average NLL: 0.4049183186896521
Fold 4 average NLL: 0.3594468617514337
Fold 5 average NLL: 0.31929744522150094
Fold 6 average NLL: 0.34732996633438845
Fold 7 average NLL: 0.42309582946995683
Fold 8 average NLL: 0.37055801258585747
Fold 9 average NLL: 0.34668731631130073
Fold 10 average NLL: 0.3425822789369963
Fold 11 average NLL: 0.35483436892866627
Fold 12 average NLL: 0.35381489408758776
Fold 13 average NLL: 0.3799306349634119
Fold 14 average NLL: 0.327793038212842
Fold 15 average NLL: 0.4150666429489938
Fold 16 average NLL: 0.32940920806729507
Fold 17 average NLL: 0.38517539733892003
Fold 18 average NLL: 0.36781989720888225
Fold 19 average NLL: 0.36657720023086454
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_solosocial_randomintercepts_bestsolomodel_socialcontext_SC-FSW-interactions.pickle


In [35]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep + D2H:FirstSeenWall + D2L:FirstSeenWall + FirstSeenWall:WallSep + (1 |GlmPlayerID)' \
                + ' + SocialContext + SocialContext:WallSep'
(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solosocial'], model_formula,
                                                    "solosocial_randomintercepts_bestsolomodel_socialcontext_SC-WS-interactions", 
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.38823514129702336
Fold 1 average NLL: 0.34112275077720194
Fold 2 average NLL: 0.3728625109730981
Fold 3 average NLL: 0.4051067210995662
Fold 4 average NLL: 0.3571027597791351
Fold 5 average NLL: 0.3197112405775514
Fold 6 average NLL: 0.34685965849003125
Fold 7 average NLL: 0.4216332237806378
Fold 8 average NLL: 0.3709729408073362
Fold 9 average NLL: 0.34685838069213765
Fold 10 average NLL: 0.34278283483335764
Fold 11 average NLL: 0.35452746008232267
Fold 12 average NLL: 0.3543039240157314
Fold 13 average NLL: 0.3813872838856566
Fold 14 average NLL: 0.32804825871221605
Fold 15 average NLL: 0.41427645197290375
Fold 16 average NLL: 0.3299488412954252
Fold 17 average NLL: 0.38469370773715644
Fold 18 average NLL: 0.36717603537212234
Fold 19 average NLL: 0.3681283667195239
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_solosocial_randomintercepts_bestsolomodel_socialcontext_SC-WS-interactions.pickle


In [36]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep + D2H:FirstSeenWall + D2L:FirstSeenWall + FirstSeenWall:WallSep + (1 |GlmPlayerID)' \
                + ' + SocialContext + SocialContext:D2H + SocialContext:D2L'
(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solosocial'], model_formula,
                                                    "solosocial_randomintercepts_bestsolomodel_socialcontext_SC-dist-interactions", 
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.3905287076229453
Fold 1 average NLL: 0.3363343869431589
Fold 2 average NLL: 0.36886403986957683
Fold 3 average NLL: 0.4040381832877353
Fold 4 average NLL: 0.3544548284402793
Fold 5 average NLL: 0.3146852426444742
Fold 6 average NLL: 0.3422057394691339
Fold 7 average NLL: 0.41672915095878266
Fold 8 average NLL: 0.366482086829888
Fold 9 average NLL: 0.3471492178181681
Fold 10 average NLL: 0.33521782556146673
Fold 11 average NLL: 0.3529558301894002
Fold 12 average NLL: 0.3527209269547742
Fold 13 average NLL: 0.37449795231571015
Fold 14 average NLL: 0.3264237099174831
Fold 15 average NLL: 0.40513227744914104
Fold 16 average NLL: 0.3248745778797144
Fold 17 average NLL: 0.3772293323432602
Fold 18 average NLL: 0.3624555814166042
Fold 19 average NLL: 0.3651004688156854
20-fold CV data saved to:  ..\data\k_fold_CV\20-fold-CV_results_solosocial_randomintercepts_bestsolomodel_socialcontext_SC-dist-interactions.pickle


In [31]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep + SocialContext + D2H:SocialContext + D2L:SocialContext + (1 |GlmPlayerID)'
(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solosocial'], model_formula,
                                                    "solosocial_randomintercepts_plus-socialcontext_fsw-interactions_socialcontext-dist_interactions", 
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.38596294642034606
Fold 1 average NLL: 0.35639297556985194
Fold 2 average NLL: 0.3800264229777872
Fold 3 average NLL: 0.40143100767363765
Fold 4 average NLL: 0.35728499255787666
Fold 5 average NLL: 0.32811226156624623
Fold 6 average NLL: 0.3671101552968014
Fold 7 average NLL: 0.4278953364125747
Fold 8 average NLL: 0.39380652398587057
Fold 9 average NLL: 0.3508724544916122
Fold 10 average NLL: 0.3466662525643493
Fold 11 average NLL: 0.37569896484303067
Fold 12 average NLL: 0.36171663114662195
Fold 13 average NLL: 0.3865608456688019
Fold 14 average NLL: 0.3331236527152994
Fold 15 average NLL: 0.41434044610366466
Fold 16 average NLL: 0.3439543693865903
Fold 17 average NLL: 0.3895402551270885
Fold 18 average NLL: 0.36876876164637773
Fold 19 average NLL: 0.361714927831515
20-fold CV data saved to:  ../data/k_fold_CV/20-fold-CV_results_solosocial_randomintercepts_plus-socialcontext_fsw-interactions_socialcontext-dist_interactions.pickle


In [32]:
model_formula = 'ChooseHigh ~ 1 + D2H + D2L + FirstSeenWall + WallSep + SocialContext + D2H:SocialContext + D2L:SocialContext ' \
'+ FirstSeenWall:WallSep + D2H:FirstSeenWall + D2L:FirstSeenWall + (1 |GlmPlayerID)'
(model_avg_nll, models,
  predictions, fold_all_nlls, fold_avg_nlls) = run_cross_validation(split_dataframes['glm_df_solosocial'], model_formula,
                                                    "solosocial_randomintercepts_plus-socialcontext_fsw-interactions_socialcontext-dist-interactions_solo-interactions", 
                                                      save_results=True)

IntProgress(value=0, description='Fitting models', max=20)

  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(
  ran_vars = ran_vars.applymap(


Fold 0 average NLL: 0.3905287063059076
Fold 1 average NLL: 0.33633441285843413
Fold 2 average NLL: 0.3688640269729019
Fold 3 average NLL: 0.40403817613974463
Fold 4 average NLL: 0.35445463902409946
Fold 5 average NLL: 0.3146852458950841
Fold 6 average NLL: 0.3422057833672172
Fold 7 average NLL: 0.4167291459502028
Fold 8 average NLL: 0.3664822810411201
Fold 9 average NLL: 0.34714920126273013
Fold 10 average NLL: 0.33521781978902837
Fold 11 average NLL: 0.352955803355181
Fold 12 average NLL: 0.35272081308056075
Fold 13 average NLL: 0.3744979056345644
Fold 14 average NLL: 0.32642372297384353
Fold 15 average NLL: 0.40513215048725115
Fold 16 average NLL: 0.3248745211525455
Fold 17 average NLL: 0.3772293241899721
Fold 18 average NLL: 0.36245558666022365
Fold 19 average NLL: 0.3651005096239049
20-fold CV data saved to:  ../data/k_fold_CV/20-fold-CV_results_solosocial_randomintercepts_plus-socialcontext_fsw-interactions_socialcontext-dist-interactions_solo-interactions.pickle
