# Import section

In [1]:
# Data handling
import pandas as pd
import numpy as np

# Model evaluation and metrics
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix, precision_score, recall_score

# pyFUME and AUK
from pyfume import SugenoFISTester
from AUK import AUK

# For saving/loading data
import pickle
import os

# Data loading

In [2]:
# Load best hyperparameters from pkl file
best_hyperparameters = pd.read_pickle('model_training/best_hyperparameters.pkl')

# Extract the first row from the 'params' column
params_dict = best_hyperparameters['params'].iloc[0]

# Ensure the 'params' column contains a dictionary
if isinstance(params_dict, dict):
    # Dynamically create variables from the dictionary keys
    for key, value in params_dict.items():
        locals()[key] = value

    # Derive dataframe_name from the params_dict or directly from 'dataframe_name' key
    if 'dataframe_name' in params_dict:
        dataframe_name = params_dict['dataframe_name']

        # Replace 'train' with 'test' in the dataframe name
        dataframe_name = dataframe_name.replace('train', 'test', 1)

        # Construct the .pkl file path
        base_folder = 'feature_selection'
        pkl_file_path = f'{base_folder}/{dataframe_name}.pkl'

        # Load the test dataframe
        test_df = pd.read_pickle(pkl_file_path)

        # Print to verify
        print(f"Loaded dataframe from: {pkl_file_path}")
    else:
        print("Error: 'dataframe_name' not found in params_dict.")
else:
    print("Error: The 'params' column does not contain a dictionary.")

Loaded dataframe from: feature_selection/test_df_all_drop_features_15.pkl


In [3]:
# drop extra columns that are not needed for the model
df_test_clean = test_df.drop(columns=['eid', 'p130894', 'p130895', 'p53_i0'])

# extract variable names
variable_names = df_test_clean.drop(columns=['target']).columns.tolist()

# Split the test data into features and ground truth
X_test = df_test_clean.drop(columns=['target'])
y_test = df_test_clean['target']

# Convert X_test and y_test to NumPy arrays
X_test = X_test.to_numpy()
y_test = y_test.to_numpy()

# Load the saved model
with open('model_training/final_model.pkl', 'rb') as file:
    loaded_model = pickle.load(file)
    
# Create a tester object to evaluate the model on the test data
tester = SugenoFISTester(model=loaded_model, test_data=X_test, variable_names=variable_names, golden_standard=y_test)

# Predict probabilities for the test data
y_pred_proba = tester.predict()

# Extract the first element of the tuple (the predicted probabilities)
y_pred_proba = y_pred_proba[0]

In [4]:
# Convert probabilities to binary predictions using a threshold (e.g., 0.5)
y_pred = (y_pred_proba > 0.5).astype(int)

In [5]:
# Ensure the index of y_pred_series matches df_test
y_pred_series = pd.Series(y_pred, index=test_df.index, name='y_pred')
y_pred_proba_series = pd.Series(y_pred_proba, index=test_df.index, name='y_pred_proba')

# create copy of df_test

df_test_incl_predictions = test_df.copy()

# Insert the new columns after the 'target' column
df_test_incl_predictions.insert(df_test_incl_predictions.columns.get_loc('target') + 1, 'y_pred', y_pred_series)
df_test_incl_predictions.insert(df_test_incl_predictions.columns.get_loc('target') + 2, 'y_pred_proba', y_pred_proba_series)

In [6]:
df_test_incl_predictions.head()

Unnamed: 0,p2976_i0_Category_E,p4136_i0_Category_E,p22684_i0_Category_E,p3720_i0_Category_E,p3700_i0_Category_E,p5001_i0_Category_B,p23478_i0,p2704_i0_Category_E,p2277_i0_Category_C,p20123_i0_Category_D,...,p864_i0_Category_E,p30210_i0,p2764_i0_Category_D,target,y_pred,y_pred_proba,eid,p130894,p130895,p53_i0
126,0.0,0.0,0.0,0.0,0.0,0.0,1.04386,0.0,0.0,1.0,...,0.0,0.548893,1.0,1,1,0.555073,ID_127,2016-04-05,Category_B,2008-10-21
314,0.0,0.0,0.0,0.0,0.0,0.0,0.888907,0.0,0.0,0.0,...,0.0,-0.095633,0.0,1,1,0.555829,ID_315,2020-05-19,Category_B,2007-08-22
267,0.0,1.0,0.0,0.0,0.0,0.0,-0.498063,0.0,0.0,0.0,...,0.0,1.050422,1.0,0,0,0.473814,ID_268,2007-09-25,Category_E,2006-06-20
282,0.0,1.0,1.0,0.0,0.0,0.0,-1.20321,0.0,0.0,0.0,...,0.0,-0.464635,0.0,0,0,0.481259,ID_283,2006-09-08,Category_C,2007-11-03
496,0.0,0.0,1.0,0.0,0.0,0.0,1.763157,0.0,0.0,0.0,...,0.0,0.195618,0.0,0,1,0.535386,ID_497,2018-07-26,Category_D,2007-01-15


# Calculate Metrics

In [7]:
# Calculate accuracy and AUC
accuracy = accuracy_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_pred_proba)
print(accuracy)
print(auc)

0.656
0.7270096928195979


In [8]:
# Calculate the confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
confusion_matrix_df = pd.DataFrame(conf_matrix, 
                                   index=['Actual 0', 'Actual 1'], 
                                   columns=['Predicted 0', 'Predicted 1'])

In [9]:
confusion_matrix_df.head()

Unnamed: 0,Predicted 0,Predicted 1
Actual 0,95,172
Actual 1,0,233


In [10]:
# Calculate precision and recall
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print(f'Precision: {precision}')
print(f'Recall: {recall}')

Precision: 0.5753086419753086
Recall: 1.0


In [11]:
# calculate AUK
auk_class = AUK(probabilities=y_pred_proba, labels=y_test)
        
# Calculate AUK
auk = auk_class.calculate_auk()
print(auk)

0.2220800063694497


In [12]:
# replace columns names 

# Dictionary for mapping old column names to new names
column_mapping = {
    'p2090_i0_Yes': 'Seen doctor (GP) for nerves, anxiety, tension or depression | Yes',
    'p2090_i0_No': 'Seen doctor (GP) for nerves, anxiety, tension or depression | No',
    'p2100_i0_No': 'Seen a psychiatrist for nerves, anxiety, tension or depression | No',
    'p1920_i0_Yes': 'Mood swings | Yes',
    'p1960_i0_No': 'Fed-up feelings | No',
    'p1920_i0_No': 'Mood swings | No',
    'p137_i0': 'Number of treatments/medications taken',
    'p2100_i0_Yes': 'Seen a psychiatrist for nerves, anxiety, tension or depression | Yes',
    'p2050_i0_Not_at_all': 'Frequency of depressed mood in last 2 weeks | Not at all',
    'p1980_i0_Yes': 'Worrier / anxious feelings | Yes'
}

# Rename columns in the DataFrame using the mapping
df_test_incl_predictions.rename(columns=column_mapping, inplace=True)

In [13]:
# check false positives
# Filter df_test_incl_predictions for rows where y_pred is 1 and target is 0
false_positives = df_test_incl_predictions[(df_test_incl_predictions['y_pred'] == 1) & 
                                       (df_test_incl_predictions['target'] == 0)]

false_positives.head(5)

Unnamed: 0,p2976_i0_Category_E,p4136_i0_Category_E,p22684_i0_Category_E,p3720_i0_Category_E,p3700_i0_Category_E,p5001_i0_Category_B,p23478_i0,p2704_i0_Category_E,p2277_i0_Category_C,p20123_i0_Category_D,...,p864_i0_Category_E,p30210_i0,p2764_i0_Category_D,target,y_pred,y_pred_proba,eid,p130894,p130895,p53_i0
496,0.0,0.0,1.0,0.0,0.0,0.0,1.763157,0.0,0.0,0.0,...,0.0,0.195618,0.0,0,1,0.535386,ID_497,2018-07-26,Category_D,2007-01-15
100,0.0,0.0,1.0,0.0,0.0,1.0,-1.252114,0.0,0.0,0.0,...,0.0,1.079856,0.0,0,1,0.513929,ID_101,2016-12-02,Category_A,2009-04-23
359,0.0,0.0,0.0,0.0,0.0,0.0,-0.10545,0.0,0.0,1.0,...,0.0,-1.490135,0.0,0,1,0.565784,ID_360,2008-05-04,Category_B,2009-10-05
201,0.0,0.0,0.0,0.0,0.0,0.0,-0.805076,0.0,0.0,0.0,...,0.0,0.09486,0.0,0,1,0.558417,ID_202,2022-08-03,Category_C,2006-11-11
6,0.0,0.0,0.0,0.0,0.0,1.0,-1.074783,0.0,1.0,0.0,...,0.0,1.648326,0.0,0,1,0.596824,ID_7,2011-09-04,Category_C,2009-10-21


In [14]:
# Filtered DataFrames for true positives and false positives
true_positives = df_test_incl_predictions[(df_test_incl_predictions['y_pred'] == 1) & (df_test_incl_predictions['target'] == 1)]
false_positives = df_test_incl_predictions[(df_test_incl_predictions['y_pred'] == 1) & (df_test_incl_predictions['target'] == 0)]

# Calculating mean and standard deviation for the first ten columns
mean_std_true_positives = true_positives.iloc[:, :10].agg(['mean', 'std'])
mean_std_false_positives = false_positives.iloc[:, :10].agg(['mean', 'std'])

In [15]:
mean_std_true_positives.head()

Unnamed: 0,p2976_i0_Category_E,p4136_i0_Category_E,p22684_i0_Category_E,p3720_i0_Category_E,p3700_i0_Category_E,p5001_i0_Category_B,p23478_i0,p2704_i0_Category_E,p2277_i0_Category_C,p20123_i0_Category_D
mean,0.0,0.0,0.201717,0.188841,0.0,0.188841,0.073596,0.0,0.223176,0.201717
std,0.0,0.0,0.402146,0.392225,0.0,0.392225,1.017957,0.0,0.417272,0.402146


In [16]:
mean_std_false_positives.head()

Unnamed: 0,p2976_i0_Category_E,p4136_i0_Category_E,p22684_i0_Category_E,p3720_i0_Category_E,p3700_i0_Category_E,p5001_i0_Category_B,p23478_i0,p2704_i0_Category_E,p2277_i0_Category_C,p20123_i0_Category_D
mean,0.005814,0.005814,0.22093,0.197674,0.011628,0.209302,0.054052,0.052326,0.215116,0.296512
std,0.076249,0.076249,0.416085,0.399408,0.107517,0.407998,1.035473,0.223333,0.412103,0.458053


# export predictions and ground truth

In [17]:
# Define the folder path
folder_name = "model_evaluation"

# Check if the folder exists, if not, create it
if not os.path.exists(folder_name):
    os.makedirs(folder_name)
    print(f"Folder '{folder_name}' created.")
else:
    print(f"Folder '{folder_name}' already exists.")

Folder 'model_evaluation' created.


In [18]:
# export predictions
np.savez('model_evaluation/predictions.npz', y_pred=y_pred, y_pred_proba=y_pred_proba, y_test=y_test)