In [1]:
import numpy as np
import pandas as pd
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV, GroupKFold, RandomizedSearchCV
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix, precision_score, recall_score, classification_report
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, StandardScaler
from hyperopt import hp
import random
import shap


Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)


In [2]:
tabular= pd.read_csv('/Users/finnschonknecht/Desktop/XGB_train_folder/tabular.csv')
targets = pd.read_csv('/Users/finnschonknecht/Desktop/XGB_train_folder/binary_personalised_targets.csv')
personality_df = pd.read_csv('/Users/finnschonknecht/Desktop/XGB_train_folder/personality_df.csv')

In [3]:
personality_columns = ['Openness', 'Agreeableness', 'Conscientiousness', 'Extraversion', 'Neuroticism']

In [4]:
tabular.fillna(0, inplace=True)

In [5]:
unique_ids = tabular['Pcode'].unique()
random.seed(150)   
test_ids = np.random.choice(unique_ids, 15, replace=False)

In [6]:
test_data= tabular[tabular['Pcode'].isin(test_ids)]
train_data= tabular[~tabular['Pcode'].isin(test_ids)]

In [7]:
def encode_dataframe(df):
    # Encode the Gender column
    if 'Gender' in df.columns:
        le = LabelEncoder()
        df['Gender'] = le.fit_transform(df['Gender'])

    # Convert object columns to category if needed
    for col in df.select_dtypes(include='object').columns:
        df[col] = df[col].astype('category')
    
    return df

def scale_numeric_columns(df_train, df_test, exclude_columns):
    # Identify numeric columns
    numeric_cols = df_train.select_dtypes(include=['float64', 'int64']).columns

    # Exclude specified columns from numeric columns
    numeric_cols = [col for col in numeric_cols if col not in exclude_columns]

    # Initialize the StandardScaler
    scaler = StandardScaler()

    # Fit the scaler on the training data and transform the training data
    df_train_scaled = df_train.copy()
    df_train_scaled[numeric_cols] = scaler.fit_transform(df_train[numeric_cols])

    # Transform the test data using the same scaler
    df_test_scaled = df_test.copy()
    df_test_scaled[numeric_cols] = scaler.transform(df_test[numeric_cols])

    return df_train_scaled, df_test_scaled

def create_personality_flags(df, personality_columns):
    for col in personality_columns:
        df[f'{col}_High'] = (df[col] >= 9).astype(int)
    return df

In [8]:
train_data, test_data = scale_numeric_columns(
    train_data, test_data, ['Stress_binary', 'Valence_binary', 'Arousal_binary', 'day_0', 'day_1', 'day_2', 'day_3', 'day_4', 'day_5', 'day_6'])

In [11]:
def xgbGroupKFoldCV(data, train, test, unique_ids, test_ids, idcolumn, outcomevar, personality_columns, dropcols=[], n_splits=5):
    
    train_data = encode_dataframe(train.copy())
    test_data = encode_dataframe(test.copy())
    
    # Create personality flags before standardization
    train_data = create_personality_flags(train_data, personality_columns)
    test_data = create_personality_flags(test_data, personality_columns)
    
    # Standardize numerical columns
    train_data, test_data = scale_numeric_columns(train_data, test_data, dropcols + [outcomevar])

    best_hyperparams = []
    F1 = []
    feature_importances_list = []

    group_kfold = GroupKFold(n_splits=n_splits)

    param_grid = {
        'n_estimators': [100, 200, 1000],
        'max_depth': [20],
        'learning_rate': [0.01],
        'subsample': [0.8]
    }

    # Step 2: Perform GroupKFold on the remaining data
    for train_index, val_index in group_kfold.split(train_data, groups=train_data[idcolumn]):
        data_train = train_data.iloc[train_index]
        data_val = train_data.iloc[val_index]

        xgb = XGBClassifier(random_state=0, n_jobs=-1, use_label_encoder=False, eval_metric='mlogloss')
        randomized_search = RandomizedSearchCV(xgb, param_grid, cv=3, scoring='f1_macro', n_jobs=-1)
        randomized_search.fit(data_train.drop(columns=dropcols + [outcomevar]), data_train[outcomevar])

        best_params = randomized_search.best_params_
        best_hyperparams.append(best_params)

        xgb_best = XGBClassifier(**best_params, random_state=0, n_jobs=-1, use_label_encoder=False, eval_metric='mlogloss')
        xgb_best.fit(data_train.drop(columns=dropcols + [outcomevar]), data_train[outcomevar])

        predictions = xgb_best.predict(data_val.drop(columns=dropcols + [outcomevar]))
        F1.append(f1_score(data_val[outcomevar], predictions, average= 'macro'))
        
        print('...Fold processing complete.')

    mean_F1_micro = np.mean(F1)
    std_F1_micro = np.std(F1)

    # Use the best hyperparameters to train the model on the entire training set
    best_params_avg = {key: np.mean([d[key] for d in best_hyperparams]) for key in best_hyperparams[0]}
    best_params_avg = {k: int(v) if isinstance(v, float) and v.is_integer() else v for k, v in best_params_avg.items()}

    final_model = XGBClassifier(**best_params_avg, random_state=0, n_jobs=-1, use_label_encoder=False, eval_metric='mlogloss')
    final_model.fit(train_data.drop(columns=dropcols + [outcomevar]), train_data[outcomevar])
    
    feature_importances_final = final_model.feature_importances_
    sorted_idx = np.argsort(feature_importances_final)[::-1]
    

    # Predict on the separate test set
    test_data_for_shap = test_data.drop(columns=dropcols + [outcomevar])
    test_predictions = final_model.predict(test_data_for_shap)
    test_confusion_matrix = confusion_matrix(test_data[outcomevar], test_predictions)
    test_F1_score = f1_score(test_data[outcomevar], test_predictions, average= 'macro')
    report = classification_report(test_data[outcomevar], test_predictions)
    

    #explainer_final = shap.TreeExplainer(final_model, test_data_for_shap, feature_perturbation="interventional")
    #shap_values_final = explainer_final.shap_values(test_data_for_shap, check_additivity=False)

    # Print metrics
    print(f'Mean F1 (Overall): {mean_F1_micro}')
    print(f'Std F1 (Overall): {std_F1_micro}')
    
    # Print confusion matrices
    print('Overall Confusion Matrix:')
    print(test_confusion_matrix)
    
    print(f'Test F1 Score: {test_F1_score}')
    print(report)

    return mean_F1_micro, std_F1_micro, test_F1_score, test_confusion_matrix, feature_importances_list, best_params_avg, final_model, test_predictions


In [12]:
mean_F1_micro_stress, std_F1_micro_stress, test_F1_score_6_stress, test_confusion_matrix_stress, feature_importances_list_stress, best_params_avg_stress, final_model_stress, test_predictions_stress = xgbGroupKFoldCV(
    data=tabular,
    train=train_data,
    test=test_data,
    unique_ids=unique_ids,
    test_ids=test_ids,
    idcolumn='Pcode',
    outcomevar='Stress_binary',
    personality_columns=personality_columns,
    dropcols=['Pcode', 'ResponseTime', 'Arousal_binary', 'Valence_binary'],
    n_splits=5
)

The total space of parameters 1 is smaller than n_iter=10. Running 1 iterations. For exhaustive searches, use GridSearchCV.


...Fold processing complete.


The total space of parameters 1 is smaller than n_iter=10. Running 1 iterations. For exhaustive searches, use GridSearchCV.


...Fold processing complete.


The total space of parameters 1 is smaller than n_iter=10. Running 1 iterations. For exhaustive searches, use GridSearchCV.


...Fold processing complete.


The total space of parameters 1 is smaller than n_iter=10. Running 1 iterations. For exhaustive searches, use GridSearchCV.


...Fold processing complete.


The total space of parameters 1 is smaller than n_iter=10. Running 1 iterations. For exhaustive searches, use GridSearchCV.


...Fold processing complete.
Mean F1 (Overall): 0.5198387052315168
Std F1 (Overall): 0.024389083487333287
Overall Confusion Matrix:
[[346 253]
 [254 214]]
Test F1 Score: 0.5174508168571849
              precision    recall  f1-score   support

         0.0       0.58      0.58      0.58       599
         1.0       0.46      0.46      0.46       468

    accuracy                           0.52      1067
   macro avg       0.52      0.52      0.52      1067
weighted avg       0.52      0.52      0.52      1067



In [13]:
mean_F1_micro_valence, std_F1_micro_valence, test_F1_score_6_valence, test_confusion_matrix_valence, feature_importances_list_valence, best_params_avg_valence, final_model_valence, test_predictions_valence = xgbGroupKFoldCV(
    data=tabular,
    train=train_data,
    test=test_data,
    unique_ids=unique_ids,
    test_ids=test_ids,
    idcolumn='Pcode',
    outcomevar='Valence_binary',
    personality_columns=personality_columns,
    dropcols=['Pcode', 'ResponseTime', 'Arousal_binary', 'Stress_binary'],
    n_splits=5
)

The total space of parameters 1 is smaller than n_iter=10. Running 1 iterations. For exhaustive searches, use GridSearchCV.


...Fold processing complete.


The total space of parameters 1 is smaller than n_iter=10. Running 1 iterations. For exhaustive searches, use GridSearchCV.


...Fold processing complete.


The total space of parameters 1 is smaller than n_iter=10. Running 1 iterations. For exhaustive searches, use GridSearchCV.


...Fold processing complete.


The total space of parameters 1 is smaller than n_iter=10. Running 1 iterations. For exhaustive searches, use GridSearchCV.


...Fold processing complete.


The total space of parameters 1 is smaller than n_iter=10. Running 1 iterations. For exhaustive searches, use GridSearchCV.


...Fold processing complete.
Mean F1 (Overall): 0.5427193777374855
Std F1 (Overall): 0.023933697980955876
Overall Confusion Matrix:
[[159 310]
 [148 450]]
Test F1 Score: 0.5362665684830633
              precision    recall  f1-score   support

         0.0       0.52      0.34      0.41       469
         1.0       0.59      0.75      0.66       598

    accuracy                           0.57      1067
   macro avg       0.56      0.55      0.54      1067
weighted avg       0.56      0.57      0.55      1067



In [14]:
mean_F1_micro_arousal, std_F1_micro_arousal, test_F1_score_6_arousal, test_confusion_matrix_arousal, feature_importances_list_arousal, best_params_avg_arousal, final_model_arousal, test_predictions_arousal = xgbGroupKFoldCV(
    data=tabular,
    train=train_data,
    test=test_data,
    unique_ids=unique_ids,
    test_ids=test_ids,
    idcolumn='Pcode',
    outcomevar='Arousal_binary',
    personality_columns=personality_columns,
    dropcols=['Pcode', 'ResponseTime', 'Valence_binary', 'Stress_binary'],
    n_splits=5
)

The total space of parameters 1 is smaller than n_iter=10. Running 1 iterations. For exhaustive searches, use GridSearchCV.


...Fold processing complete.


The total space of parameters 1 is smaller than n_iter=10. Running 1 iterations. For exhaustive searches, use GridSearchCV.


...Fold processing complete.


The total space of parameters 1 is smaller than n_iter=10. Running 1 iterations. For exhaustive searches, use GridSearchCV.


...Fold processing complete.


The total space of parameters 1 is smaller than n_iter=10. Running 1 iterations. For exhaustive searches, use GridSearchCV.


...Fold processing complete.


The total space of parameters 1 is smaller than n_iter=10. Running 1 iterations. For exhaustive searches, use GridSearchCV.


...Fold processing complete.
Mean F1 (Overall): 0.572978310029797
Std F1 (Overall): 0.01765615427611624
Overall Confusion Matrix:
[[352 224]
 [223 268]]
Test F1 Score: 0.5784558166502126
              precision    recall  f1-score   support

         0.0       0.61      0.61      0.61       576
         1.0       0.54      0.55      0.55       491

    accuracy                           0.58      1067
   macro avg       0.58      0.58      0.58      1067
weighted avg       0.58      0.58      0.58      1067

