In [2]:
import sys
sys.path.append("..")
sys.path.append("../lmmnn/")

from utils.training_functions import *
from utils.evaluation import *
from utils.utils import tune_xgboost,tune_lasso,evaluate_lr,evaluate_xgb

from data import dataset_preprocessing

from utils.evaluation import get_metrics
from xgboost import XGBClassifier, XGBRegressor
from sklearn.linear_model import LinearRegression, LogisticRegression, Ridge, Lasso

from scipy import stats


import pickle

In [1]:
dataset_name = "academic_performance"
mode="cv"
RS=1
hct=10
test_ratio=0.2
val_ratio=0.1
folds=5
target = "continuous"
experiment_name = "5CV_paper_final"

### Describe raw data

In [3]:
df = pd.read_excel(f"../data/raw/{dataset_name}/{dataset_name}.xlsx")
df = df.drop("Unnamed: 9",axis=1)
identifiers = ["COD_S11", "Cod_SPro"]
alternative_targets = ["CR_PRO", "QR_PRO", "CC_PRO", "WC_PRO", "FEP_PRO", "ENG_PRO", "QUARTILE", "PERCENTILE",
                       "2ND_DECILE", ]
df = df.drop(identifiers+alternative_targets,axis=1)

In [4]:
y_col = "G_SC"
demographic_cols = ['GENDER', 'EDU_FATHER', 'EDU_MOTHER', 'OCC_FATHER', 'OCC_MOTHER',
       'STRATUM', 'SISBEN', 'PEOPLE_HOUSE', 'INTERNET', 'TV', 'COMPUTER',
       'WASHING_MCH', 'MIC_OVEN', 'CAR', 'DVD', 'FRESH', 'PHONE', 'MOBILE','REVENUE', 'JOB', 'SCHOOL_NAME', 'SCHOOL_NAT', 'SCHOOL_TYPE','SEL', 'SEL_IHE']
perf_cols = ['MAT_S11','CR_S11', 'CC_S11', 'BIO_S11', 'ENG_S11']
activity_cols = []
other_cols = ['UNIVERSITY', 'ACADEMIC_PROGRAM']
set(df.columns)-set([y_col]+demographic_cols+perf_cols+activity_cols+other_cols)

set()

In [6]:
desc_df_dict = {"N": df.shape[0],
           "d": df.shape[1],
           "% NA": df.isna().sum().sum()/sum(df.shape),
           "Target": f"$y \in [${df[y_col].min()}..{df[y_col].max()}]",
           "Performance features": len(perf_cols),
           "Demographic features": len(demographic_cols),
           "Activity features": len(activity_cols),
           "Other features": len(other_cols),
           "Categorical features": len(df.columns[list(np.logical_and(df.nunique() > 2, df.dtypes == "object"))]),     
           "Total cardinality": df[df.columns[list(np.logical_and(df.nunique() > 2, df.dtypes == "object"))]].nunique().sum(),     
#            "High cardinality levels":  list(df.loc[:,list(df.columns[list(np.logical_and(df.nunique() >= 10, df.dtypes == "object"))])].nunique().sort_values().values),
          
}
desc_df = pd.DataFrame([desc_df_dict],index=["cortez"])
desc_df

Unnamed: 0,N,d,% NA,Target,Performance features,Demographic features,Activity features,Other features,Categorical features,Total cardinality
cortez,12411,33,0.0,$y \in [$37..247],5,25,0,2,13,3980


In [7]:
pd.DataFrame(pd.concat([df.drop(y_col,axis=1),pd.Series(df[y_col].values,index=df.index,name="target")],axis=1).corr()["target"])

Unnamed: 0,target
MAT_S11,0.643838
CR_S11,0.653572
CC_S11,0.6349
BIO_S11,0.666635
ENG_S11,0.662169
SEL,0.271465
SEL_IHE,0.3744
target,1.0


In [8]:
print(desc_df.transpose().to_latex())

\begin{tabular}{ll}
\toprule
{} &             cortez \\
\midrule
N                    &              12411 \\
d                    &                 33 \\
\% NA                 &                0.0 \\
Target               &  \$y \textbackslash in [\$37..247] \\
Performance features &                  5 \\
Demographic features &                 25 \\
Activity features    &                  0 \\
Other features       &                  2 \\
Categorical features &                 13 \\
Total cardinality    &               3980 \\
\bottomrule
\end{tabular}



### Preprocessing and preparation

In [9]:
data_path = f"{mode}_RS{RS}_hct{hct}"
if mode == "cv":
    data_path += f"_{folds}folds"
elif mode == "train_test":
    data_path += f"_split{1-test_ratio*100}-{test_ratio*100}"
elif mode == "train_val_test":
    data_path += f"_split{round(100-(test_ratio+val_ratio)*100)}-{round(test_ratio*100)}-{round(val_ratio*100)}"


# If no data_dict for the configuration exists, run preprocessing, else load data_dict
if not os.path.exists(f"../data/prepared/{dataset_name}/"+data_path+"/data_dict.pickle"):
    dataset_preprocessing.process_dataset(dataset_name, target, mode, RS, hct, test_ratio, val_ratio, folds)
with open(f"../data/prepared/{dataset_name}/{data_path}/data_dict.pickle", 'rb') as handle:
        data_dict = pickle.load(handle)


## Evaluation of categorical data treatment methods

In [10]:
conditions = ["ignore", "ohe", "target", "ordinal", "catboost", "glmm"]

In [11]:
early_stopping_rounds = 1
max_evals = 1

In [12]:
if not os.path.exists(f"../results/{dataset_name}/{experiment_name}/results_encodings.pickle"):

    results_encodings = {}
    results_encodings_feature_importances = {}

    for fold in range(folds):
        results_encodings[fold] = {}
        results_encodings_feature_importances[fold] = {}
        # Create baseline
        y_train = data_dict[f"y_train_{fold}"]
        y_val = data_dict[f"y_val_{fold}"]
        y_test = data_dict[f"y_test_{fold}"]
        y_train_val = np.concatenate([y_train,y_val])

        y_train_val_pred_base = np.zeros(y_train_val.shape[0])#*np.mean(y_train_val)
        y_test_pred_base = np.zeros(y_test.shape[0])#*np.mean(y_train_val)

        results_encodings[fold]["Baseline"] = {}
        eval_res_train = get_metrics(y_train_val, y_train_val_pred_base, target=target)
        for metric in eval_res_train.keys():
            results_encodings[fold]["Baseline"][metric + " Train"] = eval_res_train[metric]
        eval_res_test = get_metrics(y_test, y_test_pred_base, target=target)
        for metric in eval_res_test.keys():
            results_encodings[fold]["Baseline"][metric + " Test"] = eval_res_test[metric]


        for condition in conditions:
            print(f"Preparing results for fold {fold}, condition={condition}")
            # Retrieve data
            z_cols = data_dict["z_cols"]

            X_train = data_dict[f"X_train_{fold}"]
            y_train = data_dict[f"y_train_{fold}"]

            X_val = data_dict[f"X_val_{fold}"]
            y_val = data_dict[f"y_val_{fold}"]

            X_test = data_dict[f"X_test_{fold}"]
            y_test = data_dict[f"y_test_{fold}"]

    ## ALL BUT PERFORMANCE:
            # Define data subset for evaluation
    #         X_train = X_train[[i for i in X_train.columns if all([j not in i for j in perf_cols])]]
    #         X_val = X_val[[i for i in X_val.columns if all([j not in i for j in perf_cols])]]
    #         X_test = X_test[[i for i in X_test.columns if all([j not in i for j in perf_cols])]]

            # Define condition data subset
    #         if condition != "ignore":
    #             z_encoded_train = data_dict[f"z_{condition}_encoded_train_{fold}"] 
    #             z_encoded_val = data_dict[f"z_{condition}_encoded_val_{fold}"] 
    #             z_encoded_test = data_dict[f"z_{condition}_encoded_test_{fold}"] 

    #             X_train = pd.concat([X_train,z_encoded_train],axis=1)
    #             X_val = pd.concat([X_val,z_encoded_val],axis=1)
    #             X_test = pd.concat([X_test,z_encoded_test],axis=1)

    # ALL BUT PERFORMANCE & ACTIVITY:
    #         Define data subset for evaluation
            X_train = X_train[[i for i in X_train.columns if all([j not in i for j in perf_cols+activity_cols])]]
            X_val = X_val[[i for i in X_val.columns if all([j not in i for j in perf_cols+activity_cols])]]
            X_test = X_test[[i for i in X_test.columns if all([j not in i for j in perf_cols+activity_cols])]]

    #         Define condition data subset
            if condition != "ignore":
                z_encoded_train = data_dict[f"z_{condition}_encoded_train_{fold}"] 
                z_encoded_val = data_dict[f"z_{condition}_encoded_val_{fold}"] 
                z_encoded_test = data_dict[f"z_{condition}_encoded_test_{fold}"] 

                X_train = pd.concat([X_train,z_encoded_train],axis=1)
                X_val = pd.concat([X_val,z_encoded_val],axis=1)
                X_test = pd.concat([X_test,z_encoded_test],axis=1)


    ## ONLY CATEGORICAL: --> Produces trash as almost never better than baseline
    #         if condition != "ignore":        
    #             X_train = data_dict[f"z_{condition}_encoded_train_{fold}"] 
    #             X_val = data_dict[f"z_{condition}_encoded_val_{fold}"] 
    #             X_test = data_dict[f"z_{condition}_encoded_test_{fold}"] 

    #         else:
    #             continue

            X_train_val = pd.concat([X_train,X_val])
            y_train_val = np.concatenate([y_train,y_val])

            # Train base models
            res, feats = evaluate_lr(X_train_val, y_train_val, X_test, y_test, target=target,tune=False, seed=RS)
            results_encodings[fold]["LR_"+condition] = res
            results_encodings_feature_importances[fold]["LR_"+condition] = feats

            res, feats = evaluate_xgb(X_train_val, y_train_val, X_test, y_test, target, tune=False, max_evals=max_evals, early_stopping_rounds=early_stopping_rounds, seed=RS)
            results_encodings[fold]["XGB_"+condition] = res
            results_encodings_feature_importances[fold]["XGB_"+condition] = feats

            # Train tuned models
            res, feats = evaluate_lr(X_train_val, y_train_val, X_test, y_test, target=target, max_evals=max_evals, tune=True, seed=RS)
            results_encodings[fold]["LR_"+condition+"_tuned"] = res
            results_encodings_feature_importances[fold]["LR_"+condition+"_tuned"] = feats

            res, feats = evaluate_xgb(X_train_val, y_train_val, X_test, y_test, target, tune=True, max_evals=max_evals, early_stopping_rounds=early_stopping_rounds, seed=RS)
            results_encodings[fold]["XGB_"+condition+"_tuned"] = res
            results_encodings_feature_importances[fold]["XGB_"+condition+"_tuned"] = feats
    
    if not os.path.exists(f"../results/{dataset_name}/{experiment_name}"):
        os.makedirs(f"../results/{dataset_name}/{experiment_name}")
    with open(f"../results/{dataset_name}/{experiment_name}/results_encodings.pickle", 'wb') as handle:
        pickle.dump(results_encodings, handle, protocol=pickle.HIGHEST_PROTOCOL)
    with open(f"../results/{dataset_name}/{experiment_name}/results_encodings_feature_importances.pickle", 'wb') as handle:
        pickle.dump(results_encodings_feature_importances, handle, protocol=pickle.HIGHEST_PROTOCOL)

else:
    with open(f"../results/{dataset_name}/{experiment_name}/results_encodings.pickle", 'rb') as handle:
        results_encodings = pickle.load(handle)
    with open(f"../results/{dataset_name}/{experiment_name}/results_encodings_feature_importances.pickle", 'rb') as handle:
        results_encodings_feature_importances = pickle.load(handle)
        
        
results_encodings_df = pd.DataFrame(results_encodings[0]).transpose().sort_values("MSE Test",ascending=False).round(4)
results_encodings_df[["MSE Train", "R2 Train", "MSE Test", "R2 Test"]].style.highlight_min(subset=["MSE Train", "MSE Test"], color = 'lightgreen', axis = 0).highlight_max(subset=["R2 Train", "R2 Test"], color = 'lightgreen', axis = 0)

Preparing results for fold 0, condition=ignore
SCORE: 0.8410676121711731                                                                                              
100%|██████████████████████████████████████████████████| 1/1 [00:00<00:00, 16.13trial/s, best loss: 0.8410676121711731]
The best hyperparameters are :  

{'alpha': 0.020293189672666353}
Default performance on Test: 0.8872931599617004
SCORE: 0.8395212292671204                                                                                              
100%|██████████████████████████████████████████████████| 1/1 [00:12<00:00, 12.86s/trial, best loss: 0.8395212292671204]
The best hyperparameters after step 1  are :  

{'learning_rate': 0.009565280018344193, 'n_estimators': 206.0}
Test Performance after first tuning round: 0.8189454078674316
SCORE: 0.8460365533828735                                                                                              
100%|██████████████████████████████████████████████████| 1/1 [00:1

SCORE: 0.687567412853241                                                                                               
100%|███████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.09s/trial, best loss: 0.687567412853241]
The best hyperparameters after step 2 are :  

{'learning_rate': 0.2966227818807729, 'n_estimators': 476.0, 'seed': 0, 'max_depth': 11.0, 'min_child_weight': 3.0}
Test Performance after second tuning round: 0.7019045352935791
SCORE: 0.6877772808074951                                                                                              
100%|██████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.27s/trial, best loss: 0.6877772808074951]
The best hyperparameters after step 3 are :  

{'learning_rate': 0.2966227818807729, 'n_estimators': 476.0, 'seed': 0, 'max_depth': 11.0, 'min_child_weight': 3.0, 'colsample_bytree': 0.9419670933945525, 'subsample': 0.9315637169974363}
Test Performance after third tuning round: 0.7225984334945

Test Performance after third tuning round: 1.0888893604278564
SCORE: 0.8237046003341675                                                                                              
100%|██████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.53trial/s, best loss: 0.8237046003341675]
The best hyperparameters are :  

{'learning_rate': 0.4484225524061201, 'n_estimators': 317.0, 'seed': 0, 'max_depth': 7.0, 'min_child_weight': 2.0, 'colsample_bytree': 0.780879573203657, 'subsample': 0.6612196064199416, 'gamma': 0.46569849046834866, 'reg_alpha': 63.0, 'reg_lambda': 0.16854934370971153}
Test Performance after last tuning round: 0.8213481903076172
Preparing results for fold 1, condition=ohe
SCORE: 0.9817577436370382                                                                                              
100%|██████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.15trial/s, best loss: 0.9817577436370382]
The best hyperparameters are :  

{'alpha': 0.35

100%|██████████████████████████████████████████████████| 1/1 [00:00<00:00, 12.19trial/s, best loss: 0.8474593466570133]
The best hyperparameters are :  

{'alpha': 0.26982960952056917}
Default performance on Test: 0.7796429991722107
SCORE: 0.6447685360908508                                                                                              
100%|██████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.88trial/s, best loss: 0.6447685360908508]
The best hyperparameters after step 1  are :  

{'learning_rate': 0.3941202225832249, 'n_estimators': 73.0}
Test Performance after first tuning round: 0.8365575075149536
SCORE: 0.7016911506652832                                                                                              
100%|██████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.36trial/s, best loss: 0.7016911506652832]
The best hyperparameters after step 2 are :  

{'learning_rate': 0.3941202225832249, 'n_estimators': 73.0, 'seed': 0,

Test Performance after second tuning round: 0.6339802145957947
SCORE: 0.6183180212974548                                                                                              
100%|██████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.96s/trial, best loss: 0.6183180212974548]
The best hyperparameters after step 3 are :  

{'learning_rate': 0.4059427951428407, 'n_estimators': 279.0, 'seed': 0, 'max_depth': 4.0, 'min_child_weight': 8.0, 'colsample_bytree': 0.5681869039331636, 'subsample': 0.8814156992396778}
Test Performance after third tuning round: 0.6232075095176697
SCORE: 0.7051634192466736                                                                                              
100%|██████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.02s/trial, best loss: 0.7051634192466736]
The best hyperparameters are :  

{'learning_rate': 0.4059427951428407, 'n_estimators': 279.0, 'seed': 0, 'max_depth': 4.0, 'min_child_weight': 8.0, 'colsample_

Test Performance after last tuning round: 0.6406900882720947
Preparing results for fold 2, condition=glmm
SCORE: 0.7411980524744635                                                                                              
100%|██████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.59trial/s, best loss: 0.7411980524744635]
The best hyperparameters are :  

{'alpha': 0.2169003995909477}
Default performance on Test: 0.6543176770210266
SCORE: 0.6109917163848877                                                                                              
100%|██████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.30s/trial, best loss: 0.6109917163848877]
The best hyperparameters after step 1  are :  

{'learning_rate': 0.4054915953267071, 'n_estimators': 490.0}
Test Performance after first tuning round: 0.7421249151229858
SCORE: 0.6235190033912659                                                                                              
100%|██████

SCORE: 0.6336272954940796                                                                                              
100%|██████████████████████████████████████████████████| 1/1 [00:22<00:00, 22.71s/trial, best loss: 0.6336272954940796]
The best hyperparameters after step 2 are :  

{'learning_rate': 0.058114056490118166, 'n_estimators': 219.0, 'seed': 0, 'max_depth': 17.0, 'min_child_weight': 1.0}
Test Performance after second tuning round: 0.7638765573501587
SCORE: 0.5768582820892334                                                                                              
100%|██████████████████████████████████████████████████| 1/1 [00:24<00:00, 24.04s/trial, best loss: 0.5768582820892334]
The best hyperparameters after step 3 are :  

{'learning_rate': 0.058114056490118166, 'n_estimators': 219.0, 'seed': 0, 'max_depth': 17.0, 'min_child_weight': 1.0, 'colsample_bytree': 0.9671231912066727, 'subsample': 0.6994001157207446}
Test Performance after third tuning round: 0.713639497

Test Performance after third tuning round: 0.991593062877655
SCORE: 0.6329589486122131                                                                                              
100%|██████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.02trial/s, best loss: 0.6329589486122131]
The best hyperparameters are :  

{'learning_rate': 0.4500039611050748, 'n_estimators': 97.0, 'seed': 0, 'max_depth': 18.0, 'min_child_weight': 7.0, 'colsample_bytree': 0.9971422887287058, 'subsample': 0.5189382484333427, 'gamma': 7.252222381258561, 'reg_alpha': 130.0, 'reg_lambda': 0.2237804154529448}
Test Performance after last tuning round: 0.6435413360595703
Preparing results for fold 4, condition=ignore
SCORE: 0.8652374148368835                                                                                              
100%|██████████████████████████████████████████████████| 1/1 [00:00<00:00, 18.51trial/s, best loss: 0.8652374148368835]
The best hyperparameters are :  

{'alpha': 0.1

100%|██████████████████████████████████████████████████| 1/1 [00:00<00:00, 14.28trial/s, best loss: 0.9471436402250036]
The best hyperparameters are :  

{'alpha': 0.48689663107401504}
Default performance on Test: 0.6965401768684387
SCORE: 0.6617347002029419                                                                                              
100%|██████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.96trial/s, best loss: 0.6617347002029419]
The best hyperparameters after step 1  are :  

{'learning_rate': 0.37406331130531856, 'n_estimators': 497.0}
Test Performance after first tuning round: 0.7986522316932678
SCORE: 0.6766167879104614                                                                                              
100%|██████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.97trial/s, best loss: 0.6766167879104614]
The best hyperparameters after step 2 are :  

{'learning_rate': 0.37406331130531856, 'n_estimators': 497.0, 'seed'

Unnamed: 0,MSE Train,R2 Train,MSE Test,R2 Test
LR_catboost_tuned,0.9986,-0.0,0.9946,-0.0003
LR_glmm_tuned,0.9986,-0.0,0.9946,-0.0003
Baseline,0.9986,-0.0,0.9946,-0.0002
LR_ordinal_tuned,0.9612,0.0375,0.9654,0.029
XGB_ignore,0.6863,0.3127,0.8873,0.1076
LR_ohe_tuned,0.8775,0.1213,0.8656,0.1295
XGB_catboost,0.2302,0.7695,0.8547,0.1405
XGB_ignore_tuned,0.8633,0.1355,0.8473,0.1479
LR_ignore_tuned,0.8401,0.1587,0.822,0.1733
LR_ignore,0.8328,0.166,0.8113,0.184


### Feature Importances

In [13]:
# models = list(results_feature_importances[fold].keys())
# model = models[0]

# importances_df = pd.DataFrame(index=results_feature_importances[fold][model].index)
# for model in results_feature_importances[fold].keys():
#     importances_df.loc[:,model] = results_feature_importances[fold][model]
# importances_df

### Effectiveness of Parameter Tuning

In [14]:
models = results_encodings[0].keys()
metric = "MSE Test"

encodings_folds_df = pd.DataFrame([pd.DataFrame(results_encodings[fold_num]).loc[metric,models] for fold_num in results_encodings.keys()],index=results_encodings.keys())
encodings_mean_df = encodings_folds_df.mean(axis=0)
encodings_std_df = encodings_folds_df.std(axis=0)

methods = sorted(list(encodings_mean_df.index))[1:]
not_tuned = ["Baseline"]+methods[::2]
tuned = ["Baseline"]+methods[1::2]

res_df_tune_comp_mean = pd.DataFrame([encodings_mean_df.loc[not_tuned].values,encodings_mean_df.loc[tuned]],index=["Untuned","Tuned"],columns=not_tuned).transpose()
res_df_tune_comp_std = pd.DataFrame([encodings_std_df.loc[not_tuned].values,encodings_std_df.loc[tuned]],index=["Untuned","Tuned"],columns=not_tuned).transpose()

res_df_tune_comp_mean.round(2).style.highlight_min(color = 'lightgreen', axis = 1)

Unnamed: 0,Untuned,Tuned
Baseline,1.0,1.0
LR_catboost,0.66,0.88
LR_glmm,0.6,0.89
LR_ignore,0.83,0.89
LR_ohe,0.61,0.96
LR_ordinal,0.8,0.92
LR_target,0.64,0.8
XGB_catboost,0.81,0.66
XGB_glmm,0.66,0.61
XGB_ignore,0.9,0.84


In [15]:
latex_df = res_df_tune_comp_mean.round(2).astype(str) + " (" +  res_df_tune_comp_std.round(3).astype(str) + ")"
latex_df

Unnamed: 0,Untuned,Tuned
Baseline,1.0 (0.033),1.0 (0.033)
LR_catboost,0.66 (0.023),0.88 (0.101)
LR_glmm,0.6 (0.02),0.89 (0.172)
LR_ignore,0.83 (0.023),0.89 (0.087)
LR_ohe,0.61 (0.018),0.96 (0.064)
LR_ordinal,0.8 (0.023),0.92 (0.043)
LR_target,0.64 (0.022),0.8 (0.141)
XGB_catboost,0.81 (0.088),0.66 (0.021)
XGB_glmm,0.66 (0.016),0.61 (0.025)
XGB_ignore,0.9 (0.021),0.84 (0.031)


In [16]:
print(latex_df.to_latex())

\begin{tabular}{lll}
\toprule
{} &       Untuned &         Tuned \\
\midrule
Baseline     &   1.0 (0.033) &   1.0 (0.033) \\
LR\_catboost  &  0.66 (0.023) &  0.88 (0.101) \\
LR\_glmm      &    0.6 (0.02) &  0.89 (0.172) \\
LR\_ignore    &  0.83 (0.023) &  0.89 (0.087) \\
LR\_ohe       &  0.61 (0.018) &  0.96 (0.064) \\
LR\_ordinal   &   0.8 (0.023) &  0.92 (0.043) \\
LR\_target    &  0.64 (0.022) &   0.8 (0.141) \\
XGB\_catboost &  0.81 (0.088) &  0.66 (0.021) \\
XGB\_glmm     &  0.66 (0.016) &  0.61 (0.025) \\
XGB\_ignore   &   0.9 (0.021) &  0.84 (0.031) \\
XGB\_ohe      &  0.63 (0.018) &  0.67 (0.045) \\
XGB\_ordinal  &  0.68 (0.023) &  0.69 (0.068) \\
XGB\_target   &    0.7 (0.02) &  0.64 (0.022) \\
\bottomrule
\end{tabular}



### Performance Comparison

In [17]:
# For LR
models = ["Baseline"]+[i for i in results_encodings[0].keys() if ("tuned" in i and "LR" in i)]
metric = "MSE Test"

#####
dataset_res_dict = {}
best_models = {}
t_test_results = {}

use_df = pd.DataFrame([pd.DataFrame(results_encodings[fold_num]).loc[metric,models] for fold_num in results_encodings.keys()],index=results_encodings.keys())*-1

df_mean = pd.DataFrame((-1*use_df).mean(axis=0).round(3).astype(str) + " (" + use_df.std(axis=0).round(3).astype(str) + ")").transpose()
model_dict = {i: df_mean[i].values[0] for i in df_mean.columns}

best_model = use_df.columns[use_df.mean(axis=0).argmax()]

t_test_res = np.array([stats.ttest_rel(use_df[best_model].values, use_df[model].values)[1] for model in models]).round(3)
t_test_res[np.isnan(t_test_res)] = 1.
    
res_df_lr = pd.DataFrame([model_dict])

def negative_bold(val):
    i = np.where(val.name==np.array(models))[0][0]
    return ["font-weight: bold"  if t_test_res[i]>=0.05 else "" for dataset_name in val.keys()]
    # Case without transpose:
#     return ["font-weight: bold"  if t_test_results[val.name][i]>=0.05 else "" for i in range(len(val))]

res_df_lr.style.apply(negative_bold)


Unnamed: 0,Baseline,LR_ignore_tuned,LR_ohe_tuned,LR_target_tuned,LR_ordinal_tuned,LR_catboost_tuned,LR_glmm_tuned
0,0.999 (0.033),0.888 (0.087),0.956 (0.064),0.798 (0.141),0.921 (0.043),0.883 (0.101),0.888 (0.172)


In [18]:
# For LR
models = ["Baseline"]+[i for i in results_encodings[0].keys() if ("tuned" in i and "XGB" in i)]
metric = "MSE Test"

#####
dataset_res_dict = {}
best_models = {}
t_test_results = {}

use_df = pd.DataFrame([pd.DataFrame(results_encodings[fold_num]).loc[metric,models] for fold_num in results_encodings.keys()],index=results_encodings.keys())*-1

df_mean = pd.DataFrame((-1*use_df).mean(axis=0).round(3).astype(str) + " (" + use_df.std(axis=0).round(3).astype(str) + ")").transpose()
model_dict = {i: df_mean[i].values[0] for i in df_mean.columns}

best_model = use_df.columns[use_df.mean(axis=0).argmax()]

t_test_res = np.array([stats.ttest_rel(use_df[best_model].values, use_df[model].values)[1] for model in models]).round(3)
t_test_res[np.isnan(t_test_res)] = 1.
    
res_df_xgb = pd.DataFrame([model_dict])
    
def negative_bold(val):
    i = np.where(val.name==np.array(models))[0][0]
    return ["font-weight: bold"  if t_test_res[i]>=0.05 else "" for dataset_name in val.keys()]
    # Case without transpose:
#     return ["font-weight: bold"  if t_test_results[val.name][i]>=0.05 else "" for i in range(len(val))]

res_df_xgb.style.apply(negative_bold)


Unnamed: 0,Baseline,XGB_ignore_tuned,XGB_ohe_tuned,XGB_target_tuned,XGB_ordinal_tuned,XGB_catboost_tuned,XGB_glmm_tuned
0,0.999 (0.033),0.837 (0.031),0.675 (0.045),0.642 (0.022),0.693 (0.068),0.665 (0.021),0.612 (0.025)


In [19]:
res_df_lr.columns = [i.split("_")[1] if i != "Baseline" else "Baseline" for i in res_df_lr.columns]    
res_df_xgb.columns = [i.split("_")[1] if i != "Baseline" else "Baseline" for i in res_df_xgb.columns]    

latex_df_encodings = pd.concat([res_df_lr,res_df_xgb],axis=0)
latex_df_encodings.index = ["LR", "XGB"]
latex_df_encodings

Unnamed: 0,Baseline,ignore,ohe,target,ordinal,catboost,glmm
LR,0.999 (0.033),0.888 (0.087),0.956 (0.064),0.798 (0.141),0.921 (0.043),0.883 (0.101),0.888 (0.172)
XGB,0.999 (0.033),0.837 (0.031),0.675 (0.045),0.642 (0.022),0.693 (0.068),0.665 (0.021),0.612 (0.025)


In [20]:
print(latex_df_encodings.round(2).to_latex())


\begin{tabular}{llllllll}
\toprule
{} &       Baseline &         ignore &            ohe &         target &        ordinal &       catboost &           glmm \\
\midrule
LR  &  0.999 (0.033) &  0.888 (0.087) &  0.956 (0.064) &  0.798 (0.141) &  0.921 (0.043) &  0.883 (0.101) &  0.888 (0.172) \\
XGB &  0.999 (0.033) &  0.837 (0.031) &  0.675 (0.045) &  0.642 (0.022) &  0.693 (0.068) &  0.665 (0.021) &  0.612 (0.025) \\
\bottomrule
\end{tabular}



### Data subset comparisons

As it does not matter which encoding method is used we use 5CV-GLMM encoding for LR and Ordinal encoding for XGB

In [21]:
subsets = {"demo_only": demographic_cols,
           "performance_only": perf_cols,
#            "activity_only": activity_cols,
#            "activity_and_demo": activity_cols+demographic_cols,
           "performance_and_demo": perf_cols+demographic_cols,
           "all": list(df.columns)
          }

In [25]:
if not os.path.exists(f"../results/{dataset_name}/{experiment_name}/results_subsets.pickle"):

    results_subsets = {}
    results_subsets_feature_importances = {}

    for fold in range(folds):
        results_subsets[fold] = {}
        results_subsets_feature_importances[fold] = {}
        # Create baseline
        y_train = data_dict[f"y_train_{fold}"]
        y_val = data_dict[f"y_val_{fold}"]
        y_test = data_dict[f"y_test_{fold}"]
        y_train_val = np.concatenate([y_train,y_val])

        y_train_val_pred_base = np.zeros(y_train_val.shape[0])#*np.mean(y_train_val)
        y_test_pred_base = np.zeros(y_test.shape[0])#*np.mean(y_train_val)

        results_subsets[fold]["Baseline"] = {}
        eval_res_train = get_metrics(y_train_val, y_train_val_pred_base, target=target)
        for metric in eval_res_train.keys():
            results_subsets[fold]["Baseline"][metric + " Train"] = eval_res_train[metric]
        eval_res_test = get_metrics(y_test, y_test_pred_base, target=target)
        for metric in eval_res_test.keys():
            results_subsets[fold]["Baseline"][metric + " Test"] = eval_res_test[metric]


        for subset_key in subsets:
            if len(subsets[subset_key])>0:
                print(f"Preparing results for fold {fold}, subset={subset_key}")
                # Retrieve data
                z_cols = data_dict["z_cols"]

                X_train = data_dict[f"X_train_{fold}"]
                y_train = data_dict[f"y_train_{fold}"]

                X_val = data_dict[f"X_val_{fold}"]
                y_val = data_dict[f"y_val_{fold}"]

                X_test = data_dict[f"X_test_{fold}"]
                y_test = data_dict[f"y_test_{fold}"]

                y_train_val = np.concatenate([y_train,y_val])

                # Define data subset for LR
                z_glmm_encoded_train = data_dict[f"z_glmm_encoded_train_{fold}"] 
                z_glmm_encoded_val = data_dict[f"z_glmm_encoded_val_{fold}"] 
                z_glmm_encoded_test = data_dict[f"z_glmm_encoded_test_{fold}"] 
                X_train_lr = pd.concat([X_train,z_glmm_encoded_train],axis=1)
                X_val_lr = pd.concat([X_val,z_glmm_encoded_val],axis=1)
                X_test_lr = pd.concat([X_test,z_glmm_encoded_test],axis=1)      
                X_train_val_lr = pd.concat([X_train_lr,X_val_lr])

                # Define data subset for XGB
                z_ordinal_encoded_train = data_dict[f"z_ordinal_encoded_train_{fold}"] 
                z_ordinal_encoded_val = data_dict[f"z_ordinal_encoded_val_{fold}"] 
                z_ordinal_encoded_test = data_dict[f"z_ordinal_encoded_test_{fold}"] 
                X_train_xgb = pd.concat([X_train,z_ordinal_encoded_train],axis=1)
                X_val_xgb = pd.concat([X_val,z_ordinal_encoded_val],axis=1)
                X_test_xgb = pd.concat([X_test,z_ordinal_encoded_test],axis=1)
                X_train_val_xgb = pd.concat([X_train_xgb,X_val_xgb])


                # Define data subset for evaluation
                X_train_val_lr = X_train_val_lr[[i for i in X_train_val_lr.columns if i in subsets[subset_key]]]
                X_test_lr = X_test_lr[[i for i in X_test_lr.columns if i in subsets[subset_key]]]
                X_train_val_xgb = X_train_val_xgb[[i for i in X_train_val_xgb.columns if i in subsets[subset_key]]]
                X_test_xgb = X_test_xgb[[i for i in X_test_xgb.columns if i in subsets[subset_key]]]


                # Train base models
                res, feats = evaluate_lr(X_train_val_lr, y_train_val, X_test_lr, y_test, target=target,tune=False, seed=RS)
                results_subsets[fold]["LR_"+subset_key] = res
                results_subsets_feature_importances[fold]["LR_"+subset_key] = feats

                res, feats = evaluate_xgb(X_train_val_xgb, y_train_val, X_test_xgb, y_test, target, tune=False, max_evals=max_evals, early_stopping_rounds=early_stopping_rounds, seed=RS)
                results_subsets[fold]["XGB_"+subset_key] = res
                results_subsets_feature_importances[fold]["XGB_"+subset_key] = feats

                # Train tuned models
                res, feats = evaluate_lr(X_train_val_lr, y_train_val, X_test_lr, y_test, target=target, max_evals=max_evals, tune=True, seed=RS)
                results_subsets[fold]["LR_"+subset_key+"_tuned"] = res
                results_subsets_feature_importances[fold]["LR_"+subset_key+"_tuned"] = feats

                res, feats = evaluate_xgb(X_train_val_xgb, y_train_val, X_test_xgb, y_test, target, tune=True, max_evals=max_evals, early_stopping_rounds=early_stopping_rounds, seed=RS)
                results_subsets[fold]["XGB_"+subset_key+"_tuned"] = res
                results_subsets_feature_importances[fold]["XGB_"+subset_key+"_tuned"] = feats
            else:
                continue                
                
    if not os.path.exists(f"../results/{dataset_name}/{experiment_name}"):
        os.makedirs(f"../results/{dataset_name}/{experiment_name}")
    with open(f"../results/{dataset_name}/{experiment_name}/results_subsets.pickle", 'wb') as handle:
        pickle.dump(results_subsets, handle, protocol=pickle.HIGHEST_PROTOCOL)
    with open(f"../results/{dataset_name}/{experiment_name}/results_subsets_feature_importances.pickle", 'wb') as handle:
        pickle.dump(results_subsets_feature_importances, handle, protocol=pickle.HIGHEST_PROTOCOL)

else:
    with open(f"../results/{dataset_name}/{experiment_name}/results_subsets.pickle", 'rb') as handle:
        results_subsets = pickle.load(handle)
    with open(f"../results/{dataset_name}/{experiment_name}/results_subsets_feature_importances.pickle", 'rb') as handle:
        results_subsets_feature_importances = pickle.load(handle)
        
        
results_subsets_df = pd.DataFrame(results_subsets[0]).transpose().sort_values("MSE Test",ascending=False).round(4)
results_subsets_df[["MSE Train", "R2 Train", "MSE Test", "R2 Test"]].style.highlight_min(subset=["MSE Train", "MSE Test"], color = 'lightgreen', axis = 0).highlight_max(subset=["R2 Train", "R2 Test"], color = 'lightgreen', axis = 0)

Preparing results for fold 0, subset=demo_only
SCORE: 0.998807346492077                                                                                               
100%|███████████████████████████████████████████████████| 1/1 [00:00<00:00, 14.92trial/s, best loss: 0.998807346492077]
The best hyperparameters are :  

{'alpha': 0.4691514476879839}
Default performance on Test: 0.9057999849319458
SCORE: 0.8181661367416382                                                                                              
100%|██████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.59trial/s, best loss: 0.8181661367416382]
The best hyperparameters after step 1  are :  

{'learning_rate': 0.3561870396702645, 'n_estimators': 471.0}
Test Performance after first tuning round: 1.0241843461990356
SCORE: 0.8329249620437622                                                                                              
100%|██████████████████████████████████████████████████| 1/1 [00:00<00

SCORE: 0.4049307703971863                                                                                              
100%|██████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.85s/trial, best loss: 0.4049307703971863]
The best hyperparameters after step 2 are :  

{'learning_rate': 0.09764482857524383, 'n_estimators': 93.0, 'seed': 0, 'max_depth': 8.0, 'min_child_weight': 1.0}
Test Performance after second tuning round: 0.39281654357910156
SCORE: 0.40269598364830017                                                                                             
100%|█████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.74s/trial, best loss: 0.40269598364830017]
The best hyperparameters after step 3 are :  

{'learning_rate': 0.09764482857524383, 'n_estimators': 93.0, 'seed': 0, 'max_depth': 8.0, 'min_child_weight': 1.0, 'colsample_bytree': 0.6926690866822035, 'subsample': 0.9416083533004669}
Test Performance after third tuning round: 0.39002001285552

Test Performance after third tuning round: 0.4127160310745239
SCORE: 0.4113096296787262                                                                                              
100%|██████████████████████████████████████████████████| 1/1 [00:07<00:00,  7.80s/trial, best loss: 0.4113096296787262]
The best hyperparameters are :  

{'learning_rate': 0.007795516096758986, 'n_estimators': 483.0, 'seed': 0, 'max_depth': 17.0, 'min_child_weight': 9.0, 'colsample_bytree': 0.9136610437730839, 'subsample': 0.9771211031946839, 'gamma': 0.7289457476182496, 'reg_alpha': 104.0, 'reg_lambda': 0.16188162048807186}
Test Performance after last tuning round: 0.3995228111743927
Preparing results for fold 1, subset=activity_and_demo
SCORE: 0.9951144943639324                                                                                              
100%|██████████████████████████████████████████████████| 1/1 [00:00<00:00, 15.15trial/s, best loss: 0.9951144943639324]
The best hyperparameters are :  


100%|██████████████████████████████████████████████████| 1/1 [00:00<00:00, 11.23trial/s, best loss: 0.8240658392362729]
The best hyperparameters are :  

{'alpha': 0.03026215556413114}
Default performance on Test: 0.8449751138687134
SCORE: 0.8110359311103821                                                                                              
100%|██████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.74trial/s, best loss: 0.8110359311103821]
The best hyperparameters after step 1  are :  

{'learning_rate': 0.35380015198822284, 'n_estimators': 165.0}
Test Performance after first tuning round: 0.9009509086608887
SCORE: 0.8461192846298218                                                                                              
100%|██████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.09trial/s, best loss: 0.8461192846298218]
The best hyperparameters after step 2 are :  

{'learning_rate': 0.35380015198822284, 'n_estimators': 165.0, 'seed'

Test Performance after second tuning round: 0.3768017590045929
SCORE: 0.3967636823654175                                                                                              
100%|██████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.23s/trial, best loss: 0.3967636823654175]
The best hyperparameters after step 3 are :  

{'learning_rate': 0.0451199625549261, 'n_estimators': 156.0, 'seed': 0, 'max_depth': 8.0, 'min_child_weight': 0.0, 'colsample_bytree': 0.7194552326314301, 'subsample': 0.6803516654625412}
Test Performance after third tuning round: 0.36664021015167236
SCORE: 0.3949905037879944                                                                                              
100%|██████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.32s/trial, best loss: 0.3949905037879944]
The best hyperparameters are :  

{'learning_rate': 0.0451199625549261, 'n_estimators': 156.0, 'seed': 0, 'max_depth': 8.0, 'min_child_weight': 0.0, 'colsample

Test Performance after last tuning round: 0.4383499324321747
Preparing results for fold 3, subset=activity_and_demo
SCORE: 0.824343255604569                                                                                               
100%|███████████████████████████████████████████████████| 1/1 [00:00<00:00,  9.26trial/s, best loss: 0.824343255604569]
The best hyperparameters are :  

{'alpha': 0.030320786532977193}
Default performance on Test: 0.921075701713562
SCORE: 0.8147565126419067                                                                                              
100%|██████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.01trial/s, best loss: 0.8147565126419067]
The best hyperparameters after step 1  are :  

{'learning_rate': 0.2478222147240913, 'n_estimators': 408.0}
Test Performance after first tuning round: 0.9651429057121277
SCORE: 0.8177701830863953                                                                                              


SCORE: 0.9355239868164062                                                                                              
100%|██████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.62trial/s, best loss: 0.9355239868164062]
The best hyperparameters after step 2 are :  

{'learning_rate': 0.45318118216106684, 'n_estimators': 217.0, 'seed': 0, 'max_depth': 14.0, 'min_child_weight': 0.0}
Test Performance after second tuning round: 1.0167723894119263
SCORE: 0.9734603762626648                                                                                              
100%|██████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.41trial/s, best loss: 0.9734603762626648]
The best hyperparameters after step 3 are :  

{'learning_rate': 0.45318118216106684, 'n_estimators': 217.0, 'seed': 0, 'max_depth': 14.0, 'min_child_weight': 0.0, 'colsample_bytree': 0.5363102060560359, 'subsample': 0.5107018285899158}
Test Performance after third tuning round: 1.28793823719

Test Performance after third tuning round: 0.3932492434978485
SCORE: 0.3941243588924408                                                                                              
100%|██████████████████████████████████████████████████| 1/1 [00:05<00:00,  5.27s/trial, best loss: 0.3941243588924408]
The best hyperparameters are :  

{'learning_rate': 0.020127869067286176, 'n_estimators': 449.0, 'seed': 0, 'max_depth': 4.0, 'min_child_weight': 3.0, 'colsample_bytree': 0.960854704850493, 'subsample': 0.7564439206411836, 'gamma': 6.002051184760806, 'reg_alpha': 37.0, 'reg_lambda': 0.22708026466074716}
Test Performance after last tuning round: 0.3963690996170044
Preparing results for fold 4, subset=all
SCORE: 0.374882599585508                                                                                               
100%|███████████████████████████████████████████████████| 1/1 [00:00<00:00,  6.55trial/s, best loss: 0.374882599585508]
The best hyperparameters are :  

{'alpha': 0.00508

Unnamed: 0,MSE Train,R2 Train,MSE Test,R2 Test
LR_demo_only_tuned,0.9986,-0.0,0.9946,-0.0003
Baseline,0.9986,-0.0,0.9946,-0.0002
LR_activity_and_demo_tuned,0.9331,0.0656,0.9251,0.0696
XGB_activity_and_demo,0.3469,0.6526,0.9058,0.089
XGB_demo_only,0.3469,0.6526,0.9058,0.089
XGB_demo_only_tuned,0.8159,0.1829,0.7982,0.1973
LR_activity_and_demo,0.7898,0.2091,0.7841,0.2115
LR_demo_only,0.7898,0.2091,0.7841,0.2115
XGB_activity_and_demo_tuned,0.7862,0.2127,0.7837,0.2118
LR_all_tuned,0.566,0.4331,0.561,0.4358


In [26]:
results_subsets_df = pd.DataFrame(results_subsets[0]).transpose().sort_values("MSE Test",ascending=False).round(4)
results_subsets_df[["MSE Train", "R2 Train", "MSE Test", "R2 Test"]].style.highlight_min(subset=["MSE Train", "MSE Test"], color = 'lightgreen', axis = 0).highlight_max(subset=["R2 Train", "R2 Test"], color = 'lightgreen', axis = 0)

Unnamed: 0,MSE Train,R2 Train,MSE Test,R2 Test
LR_demo_only_tuned,0.9986,-0.0,0.9946,-0.0003
Baseline,0.9986,-0.0,0.9946,-0.0002
LR_activity_and_demo_tuned,0.9331,0.0656,0.9251,0.0696
XGB_activity_and_demo,0.3469,0.6526,0.9058,0.089
XGB_demo_only,0.3469,0.6526,0.9058,0.089
XGB_demo_only_tuned,0.8159,0.1829,0.7982,0.1973
LR_activity_and_demo,0.7898,0.2091,0.7841,0.2115
LR_demo_only,0.7898,0.2091,0.7841,0.2115
XGB_activity_and_demo_tuned,0.7862,0.2127,0.7837,0.2118
LR_all_tuned,0.566,0.4331,0.561,0.4358


### Effectiveness of Parameter Tuning

In [27]:
models = results_subsets[0].keys()
metric = "MSE Test"

subsets_folds_df = pd.DataFrame([pd.DataFrame(results_subsets[fold_num]).loc[metric,models] for fold_num in results_subsets.keys()],index=results_subsets.keys())
subsets_mean_df = subsets_folds_df.mean(axis=0)
subsets_std_df = subsets_folds_df.std(axis=0)

methods = sorted(list(subsets_mean_df.index))[1:]
not_tuned = ["Baseline"]+methods[::2]
tuned = ["Baseline"]+methods[1::2]

res_df_tune_comp_mean = pd.DataFrame([subsets_mean_df.loc[not_tuned].values,subsets_mean_df.loc[tuned]],index=["Untuned","Tuned"],columns=not_tuned).transpose()
res_df_tune_comp_std = pd.DataFrame([subsets_std_df.loc[not_tuned].values,subsets_std_df.loc[tuned]],index=["Untuned","Tuned"],columns=not_tuned).transpose()

res_df_tune_comp_mean.round(2).style.highlight_min(color = 'lightgreen', axis = 1)

Unnamed: 0,Untuned,Tuned
Baseline,1.0,1.0
LR_activity_and_demo,0.79,0.92
LR_all,0.37,0.47
LR_demo_only,0.79,0.94
LR_performance_and_demo,0.39,0.44
LR_performance_only,0.39,0.45
XGB_activity_and_demo,0.89,0.81
XGB_all,0.42,0.4
XGB_demo_only,0.89,0.82
XGB_performance_and_demo,0.44,0.39


In [29]:
latex_df = res_df_tune_comp_mean.round(2).astype(str) + " (" +  res_df_tune_comp_std.round(3).astype(str) + ")"
latex_df

Unnamed: 0,Untuned,Tuned
Baseline,1.0 (0.033),1.0 (0.033)
LR_activity_and_demo,0.79 (0.026),0.92 (0.066)
LR_all,0.37 (0.022),0.47 (0.104)
LR_demo_only,0.79 (0.026),0.94 (0.092)
LR_performance_and_demo,0.39 (0.022),0.44 (0.071)
LR_performance_only,0.39 (0.024),0.45 (0.059)
XGB_activity_and_demo,0.89 (0.033),0.81 (0.028)
XGB_all,0.42 (0.023),0.4 (0.031)
XGB_demo_only,0.89 (0.033),0.82 (0.031)
XGB_performance_and_demo,0.44 (0.019),0.39 (0.022)


In [30]:
print(latex_df.to_latex())

\begin{tabular}{lll}
\toprule
{} &       Untuned &         Tuned \\
\midrule
Baseline                 &   1.0 (0.033) &   1.0 (0.033) \\
LR\_activity\_and\_demo     &  0.79 (0.026) &  0.92 (0.066) \\
LR\_all                   &  0.37 (0.022) &  0.47 (0.104) \\
LR\_demo\_only             &  0.79 (0.026) &  0.94 (0.092) \\
LR\_performance\_and\_demo  &  0.39 (0.022) &  0.44 (0.071) \\
LR\_performance\_only      &  0.39 (0.024) &  0.45 (0.059) \\
XGB\_activity\_and\_demo    &  0.89 (0.033) &  0.81 (0.028) \\
XGB\_all                  &  0.42 (0.023) &   0.4 (0.031) \\
XGB\_demo\_only            &  0.89 (0.033) &  0.82 (0.031) \\
XGB\_performance\_and\_demo &  0.44 (0.019) &  0.39 (0.022) \\
XGB\_performance\_only     &  0.43 (0.021) &  0.41 (0.021) \\
\bottomrule
\end{tabular}



### Performance Results

In [31]:
# For LR
models = ["Baseline"]+[i for i in results_subsets[0].keys() if ("tuned" in i and "LR" in i)]
metric = "MSE Test"

#####
dataset_res_dict = {}
best_models = {}
t_test_results = {}

use_df = pd.DataFrame([pd.DataFrame(results_subsets[fold_num]).loc[metric,models] for fold_num in results_subsets.keys()],index=results_subsets.keys())*-1

df_mean = pd.DataFrame((-1*use_df).mean(axis=0).round(3).astype(str) + " (" + use_df.std(axis=0).round(3).astype(str) + ")").transpose()
model_dict = {i: df_mean[i].values[0] for i in df_mean.columns}

best_model = use_df.columns[use_df.mean(axis=0).argmax()]

t_test_res = np.array([stats.ttest_rel(use_df[best_model].values, use_df[model].values)[1] for model in models]).round(3)
t_test_res[np.isnan(t_test_res)] = 1.
    
res_df_lr = pd.DataFrame([model_dict])

def negative_bold(val):
    i = np.where(val.name==np.array(models))[0][0]
    return ["font-weight: bold"  if t_test_res[i]>=0.05 else "" for dataset_name in val.keys()]
    # Case without transpose:
#     return ["font-weight: bold"  if t_test_results[val.name][i]>=0.05 else "" for i in range(len(val))]

res_df_lr.style.apply(negative_bold)


Unnamed: 0,Baseline,LR_demo_only_tuned,LR_performance_only_tuned,LR_activity_and_demo_tuned,LR_performance_and_demo_tuned,LR_all_tuned
0,0.999 (0.033),0.942 (0.092),0.453 (0.059),0.916 (0.066),0.441 (0.071),0.469 (0.104)


In [33]:
# For XGB
models = ["Baseline"]+[i for i in results_subsets[0].keys() if ("tuned" in i and "XGB" in i)]
metric = "MSE Test"

#####
dataset_res_dict = {}
best_models = {}
t_test_results = {}

use_df = pd.DataFrame([pd.DataFrame(results_subsets[fold_num]).loc[metric,models] for fold_num in results_subsets.keys()],index=results_subsets.keys())*-1

df_mean = pd.DataFrame((-1*use_df).mean(axis=0).round(3).astype(str) + " (" + use_df.std(axis=0).round(3).astype(str) + ")").transpose()
model_dict = {i: df_mean[i].values[0] for i in df_mean.columns}

best_model = use_df.columns[use_df.mean(axis=0).argmax()]

t_test_res = np.array([stats.ttest_rel(use_df[best_model].values, use_df[model].values)[1] for model in models]).round(3)
t_test_res[np.isnan(t_test_res)] = 1.
    
res_df_xgb = pd.DataFrame([model_dict])

def negative_bold(val):
    i = np.where(val.name==np.array(models))[0][0]
    return ["font-weight: bold"  if t_test_res[i]>=0.05 else "" for dataset_name in val.keys()]
    # Case without transpose:
#     return ["font-weight: bold"  if t_test_results[val.name][i]>=0.05 else "" for i in range(len(val))]

res_df_xgb.style.apply(negative_bold)


Unnamed: 0,Baseline,XGB_demo_only_tuned,XGB_performance_only_tuned,XGB_activity_and_demo_tuned,XGB_performance_and_demo_tuned,XGB_all_tuned
0,0.999 (0.033),0.818 (0.031),0.413 (0.021),0.812 (0.028),0.392 (0.022),0.398 (0.031)


In [34]:
res_df_lr.columns = [i[3:-6] if i != "Baseline" else "Baseline" for i in res_df_lr.columns]    
res_df_xgb.columns = [i[4:-6] if i != "Baseline" else "Baseline" for i in res_df_xgb.columns]    

latex_df_subsets = pd.concat([res_df_lr,res_df_xgb],axis=0)
latex_df_subsets.index = ["LR", "XGB"]
latex_df_subsets

Unnamed: 0,Baseline,demo_only,performance_only,activity_and_demo,performance_and_demo,all
LR,0.999 (0.033),0.942 (0.092),0.453 (0.059),0.916 (0.066),0.441 (0.071),0.469 (0.104)
XGB,0.999 (0.033),0.818 (0.031),0.413 (0.021),0.812 (0.028),0.392 (0.022),0.398 (0.031)


In [35]:
print(latex_df_subsets.round(2).to_latex())


\begin{tabular}{lllllll}
\toprule
{} &       Baseline &      demo\_only & performance\_only & activity\_and\_demo & performance\_and\_demo &            all \\
\midrule
LR  &  0.999 (0.033) &  0.942 (0.092) &    0.453 (0.059) &     0.916 (0.066) &        0.441 (0.071) &  0.469 (0.104) \\
XGB &  0.999 (0.033) &  0.818 (0.031) &    0.413 (0.021) &     0.812 (0.028) &        0.392 (0.022) &  0.398 (0.031) \\
\bottomrule
\end{tabular}



### Feature Importance

In [None]:
# top_10_importances = {}

# for model in list(results_subsets_feature_importances[fold].keys()):
#     imp_df = pd.concat([results_subsets_feature_importances[fold][model] for fold in range(folds)],axis=1)

#     if "LR" in model:
#         direction = imp_df.apply(lambda x: np.sign(x))
#         imp_df = imp_df.abs()

#     imp_df = imp_df/imp_df.sum(axis=0)

#     mean_imp_df = imp_df.mean(axis=1)
#     std_imp_df = imp_df.std(axis=1)

#     mean_imp_df = mean_imp_df.sort_values(ascending=False)
#     std_imp_df = std_imp_df.loc[mean_imp_df.index]
#     final_imps = mean_imp_df[:10]
#     final_imps["Rest"] = sum(mean_imp_df[10:])
#     top_5_importances[model] = np.array([final_imps.index.values, final_imps.values])

In [40]:
demo_importances = {}
demo_importances_stds = {}

for model in list(results_subsets_feature_importances[fold].keys()):
    if "demo" in model or "all" in model:
        imp_df_all = pd.concat([results_subsets_feature_importances[fold][model] for fold in range(folds)],axis=1)
        
        if "LR" in model:
            direction = imp_df_all.apply(lambda x: np.sign(x))
            imp_df_all = imp_df_all.abs()
        if imp_df_all.sum().sum()!=0:
            imp_df = imp_df_all/imp_df_all.sum(axis=0)
        imp_df = imp_df.fillna(1/imp_df.shape[0])
#         imp_df = imp_df.loc[demographic_cols]

#         mean_imp_df = imp_df.mean(axis=1)
#         std_imp_df = imp_df.std(axis=1)

#         mean_imp_df = mean_imp_df.sort_values(ascending=False)
#         std_imp_df = std_imp_df.loc[mean_imp_df.index]
#         final_imps = mean_imp_df#[:10]
#         final_imps["Rest"] = sum(mean_imp_df[10:])
#         final_imps["Total"] = sum(mean_imp_df)
        demo_importances[model] = np.round(np.mean(imp_df.loc[demographic_cols].sum(axis=0)),2)#final_imps.values
        demo_importances_stds[model] = np.round(np.std(imp_df.loc[demographic_cols].sum(axis=0)),2)#final_imps.values


In [45]:
lr_demo_imp = pd.Series({i: demo_importances[i] for i in demo_importances if "LR" in i and "tuned" in i})
xgb_demo_imp = pd.Series({i: demo_importances[i] for i in demo_importances if "XGB" in i and "tuned" in i})
lr_demo_imp.index = [i[3:-6] for i in lr_demo_imp.index]    
xgb_demo_imp.index = [i[4:-6] for i in xgb_demo_imp.index]    

lr_demo_imp_stds = pd.Series({i: demo_importances_stds[i] for i in demo_importances_stds if "LR" in i and "tuned" in i})
xgb_demo_imp_stds = pd.Series({i: demo_importances_stds[i] for i in demo_importances_stds if "XGB" in i and "tuned" in i})
lr_demo_imp_stds.index = [i[3:-6] for i in lr_demo_imp_stds.index]    
xgb_demo_imp_stds.index = [i[4:-6] for i in xgb_demo_imp_stds.index]    


latex_df_imp = pd.DataFrame([lr_demo_imp.astype(str) + " (" + lr_demo_imp_stds.astype(str) + ")",
                             xgb_demo_imp.astype(str) + " (" + xgb_demo_imp_stds.astype(str) + ")"])
latex_df_imp.index = ["LR", "XGB"]
latex_df_imp

Unnamed: 0,demo_only,activity_and_demo,performance_and_demo,all
LR,1.0 (0.0),1.0 (0.0),0.02 (0.03),0.02 (0.03)
XGB,1.0 (0.0),1.0 (0.0),0.1 (0.09),0.06 (0.07)


In [44]:
lr_demo_imp.astype(str) + " (" + lr_demo_imp_stds.astype(str) + ")"

demo_only                 1.0 (0.0)
activity_and_demo         1.0 (0.0)
performance_and_demo    0.02 (0.03)
all                     0.02 (0.03)
dtype: object

In [39]:
print(latex_df_imp.to_latex())

\begin{tabular}{lrrrr}
\toprule
{} &  demo\_only &  activity\_and\_demo &  performance\_and\_demo &   all \\
\midrule
LR  &        1.0 &                1.0 &                  0.02 &  0.02 \\
XGB &        1.0 &                1.0 &                  0.10 &  0.06 \\
\bottomrule
\end{tabular}

