# Ablation Study

This notebook contains the results of the local models and the two model chains (where we use the predictions or the true values at fit time) in the case where we exclude the pre-study measurements from the predictors.

In [11]:
import numpy as np
import pandas as pd

from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import r2_score, accuracy_score, mean_squared_error as mse, brier_score_loss
from chaining import Chain
import os

In [12]:
def missingness_stratified_cv(df, N_FOLDS=5, random_state=None):
    # Add seed for reproducibility of the predictions (to get the same scores each time we run the code)
    np.random.seed(random_state)

    # Initial complete-case test fold assignment
    cv = pd.Series(np.nan, index=df.index)
    i_cc = (df.isna().sum(axis=1) == 0) # Complete cases
    cv.iloc[i_cc] = np.random.randint(low=0, high=N_FOLDS, size=i_cc.sum())

    # Go over columns from most missing to least missing
    for j in df.isna().sum().argsort()[::-1]:
        # Instances i that are not assigned yet but for which df[i,j] is observed
        i_tbf = (cv.isna()) & (~df.iloc[:,j].isna()) # to be filled
        # Fill them randomly
        cv.iloc[i_tbf] = np.random.randint(low=0, high=N_FOLDS, size=i_tbf.sum())

    return cv

In [13]:
def missingness_and_categorical_stratified_cv(df, N_FOLDS=5, random_state=None):
    # Add seed for reproducibility of the predictions (to get the same scores each time we run the code)
    np.random.seed(random_state)

    # Initial complete-case test fold assignment
    cv = pd.Series(np.nan, index=df.index)
    i_cc = (df.isna().sum(axis=1) == 0) # Complete cases
    cv.iloc[i_cc] = np.random.randint(low=0, high=N_FOLDS, size=i_cc.sum())

    # Stratify categorical variables
    for col in df.select_dtypes(include=['category']):
        counts = df[col].value_counts(normalize=True)
        for category in counts.index:
            idx = df[col] == category
            cv[idx] = cv[idx].fillna(np.random.choice(np.where(idx)[0], size=int(counts[category] * N_FOLDS), replace=False))

    # Go over columns from most missing to least missing
    for j in df.isna().sum().argsort()[::-1]:
        # Instances i that are not assigned yet but for which df[i,j] is observed
        i_tbf = (cv.isna()) & (~df.iloc[:,j].isna()) # to be filled
        # Fill them randomly
        cv.iloc[i_tbf] = np.random.randint(low=0, high=N_FOLDS, size=i_tbf.sum())

    return cv

In [14]:
# Insert path to data file here
possible_paths = [
    'C:/Users/lenne/OneDrive/Documenten/Master of Statistics and Data Science/2023-2024/Master thesis/Thesis_Sofia_Lennert/new_data',
    'C:/Users/anaso/Desktop/SOFIA MENDES/KU Leuven/Master Thesis/Thesis_Sofia_Lennert/new_data'
]

# File name
file = 'merged_data.csv'

# Find full paths to the CSV files
path = next((f'{path}/{file}' for path in possible_paths if os.path.exists(f'{path}/{file}')), None)

data = pd.read_csv(path)

# Bin the number of relapses into 0, 1, 2, 3 and 4+ 
def bin_column(value):
    if value in [0, 1, 2, 3]:
        return str(value)
    else:
        return '4+'
data['NRELAP'] = data['NRELAP'].apply(bin_column)

# Display all columns
pd.set_option('display.max_columns', None)
#data

In [15]:
# Choice of target variables, and listed already in the chain order 
variables = ['KFSS_M-2y', 'KFSS_P-2y', 'EDSS-2y', 'T25FW-2y', 'NHPT-2y', 'P_R36-SF12-after', 'M_R36-SF12-after', 
             'SES_after', 'SLEC_after', 'KFSS_M-after_2y', 'KFSS_P-after_2y', 'EDSS-after_2y', 'NRELAP', 'CESEV']

# Extract targets
targets = data[variables]

# Choice of predictor variables: exclude pre-study outcomes of tests and questionnaries
columns_to_keep = ['AGE', 'SEX', 'RACE', 'CONTINENT', 'MHDIAGN', 'CARDIO', 'URINARY', 'MUSCKELET', 'FATIGUE']

# Extract features
features = data[columns_to_keep]
#features

In [16]:
# Use one-hot encoding for categorical and binary input variables
object_columns = features.select_dtypes(include=['object'])
features = pd.get_dummies(features, columns=object_columns.columns, dtype=int)
#features.head()

In [17]:
targets.dtypes

KFSS_M-2y           float64
KFSS_P-2y           float64
EDSS-2y             float64
T25FW-2y            float64
NHPT-2y             float64
P_R36-SF12-after    float64
M_R36-SF12-after    float64
SES_after           float64
SLEC_after          float64
KFSS_M-after_2y     float64
KFSS_P-after_2y     float64
EDSS-after_2y       float64
NRELAP               object
CESEV                object
dtype: object

In [18]:
# Set random state for reproducibility
random_state = 42
N_FOLDS = 5

In [19]:
# Generate CV folds
cv=missingness_and_categorical_stratified_cv(targets, N_FOLDS, random_state)
cv = cv.to_frame(name="CV Fold")

features_cv = pd.merge(features, pd.DataFrame(cv), left_index=True, right_index=True)
targets_cv = pd.merge(targets, pd.DataFrame(cv), left_index=True, right_index=True)

features_cv['CV Fold'].value_counts()

CV Fold
4.0    510
3.0    502
0.0    500
1.0    495
2.0    458
Name: count, dtype: int64

In [20]:
# Defina a function to calculate the normalized MSE
def normalized_mean_squared_error(true, pred, train):
    num = mse(true, pred)
    mean_value = np.mean(train)
    mean = np.full_like(true, mean_value)
    den = mse(true, mean)
    nmse_loss = num/den
    return nmse_loss

---

# Local Models

In [21]:
y_pred_list = []
y_test_list = []
y_train_list = []
y_pred_prob_list = []
yi_test_dummies_list = []
yi_train_dummies_list = []

for i in range(0, N_FOLDS): 
    Xi_train = features_cv[features_cv['CV Fold'] != i].drop(["CV Fold"], axis=1)
    Xi_test = features_cv[features_cv['CV Fold'] == i].drop(["CV Fold"], axis=1)
    yi_train = targets_cv[targets_cv['CV Fold'] != i].drop(["CV Fold"], axis=1)
    yi_test = targets_cv[targets_cv['CV Fold'] == i].drop(["CV Fold"], axis=1)
    y_test_list.append(pd.DataFrame(yi_test, columns=yi_test.columns, index=yi_test.index))
    y_train_list.append(pd.DataFrame(yi_train, columns=yi_train.columns, index=yi_train.index))

    # One hot encode categorical targets of test set to be able to compute brier score
    subset_yi_test = yi_test.select_dtypes(include=['object'])
    yi_test_dummies = pd.get_dummies(subset_yi_test, columns=subset_yi_test.columns, dtype=int)
    subset_yi_train = yi_train.select_dtypes(include=['object'])
    yi_train_dummies = pd.get_dummies(subset_yi_train, columns=subset_yi_train.columns, dtype=int)
    

    chain = Chain(
        model_reg=RandomForestRegressor(random_state=random_state),
        model_clf=RandomForestClassifier(random_state=random_state),
        propagate=False, #RUN LOCAL MODELS 
    )
    chain.fit(Xi_train, yi_train, target_types=None) #["reg","reg","reg","reg","reg","reg","reg","reg","reg","clf","clf"]
    y_pred = chain.predict(Xi_test)
    y_pred_prob = chain.predict_proba(Xi_test)
    y_pred_list.append(y_pred)
    y_pred_prob_list.append(y_pred_prob)
    yi_test_dummies_list.append(yi_test_dummies)
    yi_train_dummies_list.append(yi_train_dummies)
    
    print("Done with evaluating on CV Fold {}".format(i+1))

Done with evaluating on CV Fold 1
Done with evaluating on CV Fold 2
Done with evaluating on CV Fold 3
Done with evaluating on CV Fold 4
Done with evaluating on CV Fold 5


In [22]:
# Obtain values of baseline model
yi_train_dummies_avg = []
i=0
# Calculate the percentage of 1s in each column
for yi_train_dummies_fold in yi_train_dummies_list:

    percentages = yi_train_dummies_fold.sum() / len(yi_train_dummies_fold)

    yi_train_dummies_avg_fold = pd.DataFrame(0, index=yi_test_dummies_list[i].index, columns=yi_train_dummies_fold.columns)

    # Replace values in each column with the corresponding percentage
    for col in yi_train_dummies_avg_fold.columns:
        yi_train_dummies_avg_fold[col] = yi_train_dummies_avg_fold[col].apply(lambda x: percentages[col])
    
    i += 1
    yi_train_dummies_avg.append(yi_train_dummies_avg_fold)



# Re-arrange output of predicted probabilities
concatenated_dfs = []

# Iterate over each pair of arrays
for j, fold in enumerate(y_pred_prob_list):
    dfs = []
    len_array = 0
    
    for i, array in enumerate(fold):
        # Convert array to DataFrame
        col = yi_test_dummies_list[j].columns[len_array:len_array+len(array[0])]
        df = pd.DataFrame(array, columns=col, index=yi_test_dummies_list[j].index)
        dfs.append(df)
        len_array += len(array[0])
    
    # Concatenate DataFrames
    concatenated_df = pd.concat(dfs, axis=1)
    concatenated_dfs.append(concatenated_df)

In [23]:
# Initialize lists to store scores
scores_with_std = []
avg_brier_score = []
avg_baseline_score = []
variables_cat = yi_test_dummies_list[0].columns
cat_normalized_brier=[]

# Create a dictionary to store the scores for variables with the same letters before the '_'
brier_scores_dict = {}
baseline_scores_dict = {}

# Iterate over each outcome variable in the folds
for level_name in variables_cat: 
    brier_scores = []
    baseline_scores = []
    
    # Compute scores for the variable across all folds
    for fold_index in range(len(yi_test_dummies_list)):
        y_test = yi_test_dummies_list[fold_index][level_name] 
        y_prob = concatenated_dfs[fold_index][level_name] 
        y_prob_avg = yi_train_dummies_avg[fold_index][level_name] 
        
        # Compute the Brier score and the normalized Brier score
        brier_score = brier_score_loss(y_test, y_prob)
        N_brier_score = brier_score
        brier_baseline = brier_score_loss(y_test, y_prob_avg)
        N_brier_baseline = brier_baseline

        # Append the normalized Brier score to the variable scores list
        brier_scores.append(N_brier_score)
        baseline_scores.append(N_brier_baseline)
    
    # Check if the variable name has letters before the '_'
    prefix = level_name.split('_')[0]
    
    # Add the normalized Brier scores to the dictionary based on the prefix
    if prefix in brier_scores_dict:
        brier_scores_dict[prefix].extend(brier_scores)
    else:
        brier_scores_dict[prefix] = brier_scores

    if prefix in baseline_scores_dict:
        baseline_scores_dict[prefix].extend(baseline_scores)
    else:
        baseline_scores_dict[prefix] = baseline_scores

# Compute the average and standard deviation of normalized Brier score for each prefix
for prefix, scores in brier_scores_dict.items():
    sum_score = np.sum(scores)
    avg_brier_score.append((prefix, sum_score))

for prefix, scores in baseline_scores_dict.items():
    sum_score = np.sum(scores)
    avg_baseline_score.append((prefix, sum_score))

normalized_score_list = []
for i in range(len(avg_brier_score)):
    normalized_score = avg_brier_score[i][1]/avg_baseline_score[i][1]
    cell = (avg_brier_score[i][0], normalized_score)
    normalized_score_list.append(cell)


# Print the scores with average and standard deviation along with variable names
print("Normalized Brier scores for each categorical variable:")
for prefix, avg_score in normalized_score_list:
    print(f"{prefix}: {avg_score:.2f} ")
    cat_normalized_brier.append(avg_score)

Normalized Brier scores for each categorical variable:
NRELAP: 1.10 
CESEV: 1.92 


In [24]:
# Remove rows in y_test and y_pred where the variable in question is missing in y_test (since without it, it is not possible to calculate the score)
y_test_cv = []
y_pred_cv = []

for j in range(len(y_test_list)): 
    y_test_targ = []
    y_pred_targ = []
    nvar=y_test_list[0].shape[1]

    for i in range(0, nvar):  
        missing_rows_mask = y_test_list[j].iloc[:, i].isna()
        y_test = y_test_list[j].iloc[:, i][~missing_rows_mask]
        y_pred = y_pred_list[j].iloc[:, i][~missing_rows_mask]
        
        y_test_targ.append(y_test)
        y_pred_targ.append(y_pred)
    
    y_test_cv.append(y_test_targ)
    y_pred_cv.append(y_pred_targ)



# OBTAIN NORMALIZED MSE

# Initialize a list to store scores
scores_with_std = []

# Iterate over each outcome variable in the folds
for variable_name in variables: 
    variable_scores = []
    
    # Check if the target variable is numerical or categorical
    if y_test_cv[0][variables.index(variable_name)].dtype.kind in 'bifc':
        # Compute scores for the variable across all folds
        for fold_index in range(len(y_test_cv)):
            y_test = y_test_cv[fold_index][variables.index(variable_name)] 
            y_pred = y_pred_cv[fold_index][variables.index(variable_name)] 
            y_train = y_train_list[fold_index][variable_name]

            score = normalized_mean_squared_error(y_test, y_pred, y_train)
            variable_scores.append(score)
        
        # Compute the average score for the variable across all folds
        variable_avg_score = np.mean(variable_scores)
        
        # Compute the standard deviation for the variable across all folds
        variable_std_score = np.std(variable_scores)
        
        # Append the tuple with three elements to the scores_with_std list
        scores_with_std.append((variable_name, variable_avg_score, variable_std_score))

num_normalized_brier=[]
num_std_brier=[]

# Print the scores with average and standard deviation along with variable names
print("Scores for each outcome (local):")
for variable_name, avg_score, std_score in scores_with_std:
    print(f"{variable_name}: {avg_score:.2f} (± {std_score:.2f})")
    num_normalized_brier.append(avg_score)
    num_std_brier.append(std_score)

Scores for each outcome (local):
KFSS_M-2y: 0.71 (± 0.07)
KFSS_P-2y: 0.87 (± 0.11)
EDSS-2y: 0.52 (± 0.03)
T25FW-2y: 1.22 (± 0.22)
NHPT-2y: 1.33 (± 0.33)
P_R36-SF12-after: 0.86 (± 0.02)
M_R36-SF12-after: 1.09 (± 0.03)
SES_after: 1.07 (± 0.10)
SLEC_after: 1.12 (± 0.07)
KFSS_M-after_2y: 0.75 (± 0.05)
KFSS_P-after_2y: 1.09 (± 0.22)
EDSS-after_2y: 0.60 (± 0.06)


In [25]:
# Concatenate normalized brier scores for all variables (both numerical and categorical) 
combined_normalized_brier = np.concatenate((num_normalized_brier, cat_normalized_brier))
print(combined_normalized_brier)

# Compute the average relative Brier score
average_normalized_brier = np.mean(combined_normalized_brier)
print("Average relative Brier score:", average_normalized_brier)

[0.7144871  0.86754384 0.51699737 1.22284979 1.3320739  0.86166666
 1.08575278 1.06705598 1.12374131 0.75011666 1.08720878 0.60046788
 1.09890584 1.91748017]
Average relative Brier score: 1.0175962902642104


# Propagate predictions

In [26]:
y_pred_list = []
y_test_list = []
y_train_list = []
y_pred_prob_list = []
yi_test_dummies_list = []
yi_train_dummies_list = []

for i in range(0, N_FOLDS): 
    Xi_train = features_cv[features_cv['CV Fold'] != i].drop(["CV Fold"], axis=1)
    Xi_test = features_cv[features_cv['CV Fold'] == i].drop(["CV Fold"], axis=1)
    yi_train = targets_cv[targets_cv['CV Fold'] != i].drop(["CV Fold"], axis=1)
    yi_test = targets_cv[targets_cv['CV Fold'] == i].drop(["CV Fold"], axis=1)
    y_test_list.append(pd.DataFrame(yi_test, columns=yi_test.columns, index=yi_test.index))
    y_train_list.append(pd.DataFrame(yi_train, columns=yi_train.columns, index=yi_train.index))


    # One hot encode categorical targets of test set to be able to compute brier score
    subset_yi_test = yi_test.select_dtypes(include=['object'])
    yi_test_dummies = pd.get_dummies(subset_yi_test, columns=subset_yi_test.columns, dtype=int)
    subset_yi_train = yi_train.select_dtypes(include=['object'])
    yi_train_dummies = pd.get_dummies(subset_yi_train, columns=subset_yi_train.columns, dtype=int)
    

    chain = Chain(
        model_reg=RandomForestRegressor(random_state=random_state),
        model_clf=RandomForestClassifier(random_state=random_state),
        propagate="pred",
    )


    chain.fit(Xi_train, yi_train, target_types=None) #["reg","reg","reg","reg","reg","reg","reg","reg","reg","clf","clf"]
    y_pred = chain.predict(Xi_test)
    y_pred_prob = chain.predict_proba(Xi_test)
    y_pred_list.append(y_pred)
    y_pred_prob_list.append(y_pred_prob)
    yi_test_dummies_list.append(yi_test_dummies)
    yi_train_dummies_list.append(yi_train_dummies)
    
    print("Done with evaluating on CV Fold {}".format(i+1))

Done with evaluating on CV Fold 1
Done with evaluating on CV Fold 2
Done with evaluating on CV Fold 3
Done with evaluating on CV Fold 4
Done with evaluating on CV Fold 5


In [27]:
# Obtain values of baseline model
yi_train_dummies_avg = []
i=0
 
for yi_train_dummies_fold in yi_train_dummies_list:

    percentages = yi_train_dummies_fold.sum() / len(yi_train_dummies_fold)

    yi_train_dummies_avg_fold = pd.DataFrame(0, index=yi_test_dummies_list[i].index, columns=yi_train_dummies_fold.columns)

    # Replace values in each column with the corresponding percentage
    for col in yi_train_dummies_avg_fold.columns:
        yi_train_dummies_avg_fold[col] = yi_train_dummies_avg_fold[col].apply(lambda x: percentages[col])
    
    i += 1
    yi_train_dummies_avg.append(yi_train_dummies_avg_fold)



# Re-arrange output of predicted probabilities
concatenated_dfs = []

# Iterate over each pair of arrays
for j, fold in enumerate(y_pred_prob_list):
    dfs = []
    len_array = 0
    
    for i, array in enumerate(fold):
        # Convert array to DataFrame
        col = yi_test_dummies_list[j].columns[len_array:len_array+len(array[0])]
        df = pd.DataFrame(array, columns=col, index=yi_test_dummies_list[j].index)
        dfs.append(df)
        len_array += len(array[0])
    
    # Concatenate DataFrames
    concatenated_df = pd.concat(dfs, axis=1)
    concatenated_dfs.append(concatenated_df)

In [28]:
# Initialize lists to store scores
scores_with_std = []
avg_brier_score = []
avg_baseline_score = []
variables_cat = yi_test_dummies_list[0].columns
cat_normalized_brier=[]

# Create a dictionary to store the scores for variables with the same letters before the '_'
brier_scores_dict = {}
baseline_scores_dict = {}

# Iterate over each outcome variable in the folds
for level_name in variables_cat: 
    brier_scores = []
    baseline_scores = []
    
    # Compute scores for the variable across all folds
    for fold_index in range(len(yi_test_dummies_list)):
        y_test = yi_test_dummies_list[fold_index][level_name] 
        y_prob = concatenated_dfs[fold_index][level_name] 
        y_prob_avg = yi_train_dummies_avg[fold_index][level_name] 
        
        # Compute the Brier score and the normalized Brier score
        brier_score = brier_score_loss(y_test, y_prob)
        N_brier_score = brier_score
        brier_baseline = brier_score_loss(y_test, y_prob_avg)
        N_brier_baseline = brier_baseline

        # Append the normalized Brier score to the variable scores list
        brier_scores.append(N_brier_score)
        baseline_scores.append(N_brier_baseline)
    
    # Check if the variable name has letters before the '_'
    prefix = level_name.split('_')[0]
    
    # Add the normalized Brier scores to the dictionary based on the prefix
    if prefix in brier_scores_dict:
        brier_scores_dict[prefix].extend(brier_scores)
    else:
        brier_scores_dict[prefix] = brier_scores

    if prefix in baseline_scores_dict:
        baseline_scores_dict[prefix].extend(baseline_scores)
    else:
        baseline_scores_dict[prefix] = baseline_scores

# Compute the average and standard deviation of normalized Brier score for each prefix
for prefix, scores in brier_scores_dict.items():
    sum_score = np.sum(scores)
    avg_brier_score.append((prefix, sum_score))

for prefix, scores in baseline_scores_dict.items():
    sum_score = np.sum(scores)
    avg_baseline_score.append((prefix, sum_score))

normalized_score_list = []
for i in range(len(avg_brier_score)):
    normalized_score = avg_brier_score[i][1]/avg_baseline_score[i][1]
    cell = (avg_brier_score[i][0], normalized_score)
    normalized_score_list.append(cell)


# Print the scores with average and standard deviation along with variable names
print("Normalized Brier scores for each categorical variable:")
for prefix, avg_score in normalized_score_list:
    print(f"{prefix}: {avg_score:.2f} ")
    cat_normalized_brier.append(avg_score)

Normalized Brier scores for each categorical variable:
NRELAP: 1.05 
CESEV: 1.74 


In [29]:
# Remove rows in y_test and y_pred where the variable in question is missing in y_test (since without it, it is not possible to calculate the score)
y_test_cv = []
y_pred_cv = []

for j in range(len(y_test_list)): 
    y_test_targ = []
    y_pred_targ = []
    nvar=y_test_list[0].shape[1]

    for i in range(0, nvar):  
        missing_rows_mask = y_test_list[j].iloc[:, i].isna()
        y_test = y_test_list[j].iloc[:, i][~missing_rows_mask]
        y_pred = y_pred_list[j].iloc[:, i][~missing_rows_mask]
        
        y_test_targ.append(y_test)
        y_pred_targ.append(y_pred)
    
    y_test_cv.append(y_test_targ)
    y_pred_cv.append(y_pred_targ)



# OBTAIN NORMALIZED MSE 

# Initialize a list to store scores
scores_with_std = []

# Iterate over each outcome variable in the folds
for variable_name in variables: 
    variable_scores = []
    
    # Check if the target variable is numerical or categorical
    if y_test_cv[0][variables.index(variable_name)].dtype.kind in 'bifc':
        # Compute scores for the variable across all folds
        for fold_index in range(len(y_test_cv)):
            y_test = y_test_cv[fold_index][variables.index(variable_name)] 
            y_pred = y_pred_cv[fold_index][variables.index(variable_name)] 
            y_train = y_train_list[fold_index][variable_name]

            score = normalized_mean_squared_error(y_test, y_pred, y_train)
            variable_scores.append(score)
        
        # Compute the average score for the variable across all folds
        variable_avg_score = np.mean(variable_scores)
        
        # Compute the standard deviation for the variable across all folds
        variable_std_score = np.std(variable_scores)
        
        # Append the tuple with three elements to the scores_with_std list
        scores_with_std.append((variable_name, variable_avg_score, variable_std_score))

num_normalized_brier=[]
num_std_brier=[]

# Print the scores with average and standard deviation along with variable names
print("Scores for each outcome (chain - propagate predictions):")
for variable_name, avg_score, std_score in scores_with_std:
    print(f"{variable_name}: {avg_score:.2f} (± {std_score:.2f})")
    num_normalized_brier.append(avg_score)
    num_std_brier.append(std_score)

Scores for each outcome (chain - propagate predictions):
KFSS_M-2y: 0.71 (± 0.07)
KFSS_P-2y: 0.82 (± 0.09)
EDSS-2y: 0.54 (± 0.04)
T25FW-2y: 1.16 (± 0.12)
NHPT-2y: 1.16 (± 0.27)
P_R36-SF12-after: 0.90 (± 0.04)
M_R36-SF12-after: 1.15 (± 0.08)
SES_after: 1.07 (± 0.09)
SLEC_after: 1.17 (± 0.10)
KFSS_M-after_2y: 0.80 (± 0.03)
KFSS_P-after_2y: 1.14 (± 0.15)
EDSS-after_2y: 0.68 (± 0.08)


In [30]:
# Concatenate normalized brier scores for all variables (both numerical and categorical) 
combined_normalized_brier = np.concatenate((num_normalized_brier, cat_normalized_brier))
print(combined_normalized_brier)

# Compute the average relative Brier score
average_normalized_brier = np.mean(combined_normalized_brier)
print("Average relative Brier score:", average_normalized_brier)

[0.7144871  0.82008945 0.53679771 1.15815499 1.163118   0.8959963
 1.1547296  1.06726975 1.1652273  0.79620393 1.13779282 0.68285029
 1.04821766 1.74476722]
Average relative Brier score: 1.0061215798487246


# Propagate true values

In [31]:
y_pred_list = []
y_test_list = []
y_train_list = []
y_pred_prob_list = []
yi_test_dummies_list = []
yi_train_dummies_list = []

for i in range(0, N_FOLDS): 
    Xi_train = features_cv[features_cv['CV Fold'] != i].drop(["CV Fold"], axis=1)
    Xi_test = features_cv[features_cv['CV Fold'] == i].drop(["CV Fold"], axis=1)
    yi_train = targets_cv[targets_cv['CV Fold'] != i].drop(["CV Fold"], axis=1)
    yi_test = targets_cv[targets_cv['CV Fold'] == i].drop(["CV Fold"], axis=1)
    y_test_list.append(pd.DataFrame(yi_test, columns=yi_test.columns, index=yi_test.index))
    y_train_list.append(pd.DataFrame(yi_train, columns=yi_train.columns, index=yi_train.index))

    # One hot encode categorical targets of test set to be able to compute brier score
    subset_yi_test = yi_test.select_dtypes(include=['object'])
    yi_test_dummies = pd.get_dummies(subset_yi_test, columns=subset_yi_test.columns, dtype=int)
    subset_yi_train = yi_train.select_dtypes(include=['object'])
    yi_train_dummies = pd.get_dummies(subset_yi_train, columns=subset_yi_train.columns, dtype=int)
    

    chain = Chain(
        model_reg=RandomForestRegressor(random_state=random_state),
        model_clf=RandomForestClassifier(random_state=random_state),
        propagate="true", 
    )


    chain.fit(Xi_train, yi_train, target_types=None) #["reg","reg","reg","reg","reg","reg","reg","reg","reg","clf","clf"]
    y_pred = chain.predict(Xi_test)
    y_pred_prob = chain.predict_proba(Xi_test)
    y_pred_list.append(y_pred)
    y_pred_prob_list.append(y_pred_prob)
    yi_test_dummies_list.append(yi_test_dummies)
    yi_train_dummies_list.append(yi_train_dummies)
    
    print("Done with evaluating on CV Fold {}".format(i+1))

Done with evaluating on CV Fold 1
Done with evaluating on CV Fold 2
Done with evaluating on CV Fold 3
Done with evaluating on CV Fold 4
Done with evaluating on CV Fold 5


In [32]:
# Obtain values of baseline model
yi_train_dummies_avg = []
i=0
 
for yi_train_dummies_fold in yi_train_dummies_list:

    percentages = yi_train_dummies_fold.sum() / len(yi_train_dummies_fold)

    yi_train_dummies_avg_fold = pd.DataFrame(0, index=yi_test_dummies_list[i].index, columns=yi_train_dummies_fold.columns)

    # Replace values in each column with the corresponding percentage
    for col in yi_train_dummies_avg_fold.columns:
        yi_train_dummies_avg_fold[col] = yi_train_dummies_avg_fold[col].apply(lambda x: percentages[col])
    
    i += 1
    yi_train_dummies_avg.append(yi_train_dummies_avg_fold)



# Re-arrange output of predicted probabilities
concatenated_dfs = []

# Iterate over each pair of arrays
for j, fold in enumerate(y_pred_prob_list):
    dfs = []
    len_array = 0
    
    for i, array in enumerate(fold):
        # Convert array to DataFrame
        col = yi_test_dummies_list[j].columns[len_array:len_array+len(array[0])]
        df = pd.DataFrame(array, columns=col, index=yi_test_dummies_list[j].index)
        dfs.append(df)
        len_array += len(array[0])
    
    # Concatenate DataFrames
    concatenated_df = pd.concat(dfs, axis=1)
    concatenated_dfs.append(concatenated_df)

In [33]:
# Initialize lists to store scores
scores_with_std = []
avg_brier_score = []
avg_baseline_score = []
variables_cat = yi_test_dummies_list[0].columns
cat_normalized_brier=[]

# Create a dictionary to store the scores for variables with the same letters before the '_'
brier_scores_dict = {}
baseline_scores_dict = {}

# Iterate over each outcome variable in the folds
for level_name in variables_cat: 
    brier_scores = []
    baseline_scores = []
    
    # Compute scores for the variable across all folds
    for fold_index in range(len(yi_test_dummies_list)):
        y_test = yi_test_dummies_list[fold_index][level_name] 
        y_prob = concatenated_dfs[fold_index][level_name] 
        y_prob_avg = yi_train_dummies_avg[fold_index][level_name] 
        
        # Compute the Brier score and the normalized Brier score
        brier_score = brier_score_loss(y_test, y_prob)
        N_brier_score = brier_score
        brier_baseline = brier_score_loss(y_test, y_prob_avg)
        N_brier_baseline = brier_baseline

        # Append the normalized Brier score to the variable scores list
        brier_scores.append(N_brier_score)
        baseline_scores.append(N_brier_baseline)
    
    # Check if the variable name has letters before the '_'
    prefix = level_name.split('_')[0]
    
    # Add the normalized Brier scores to the dictionary based on the prefix
    if prefix in brier_scores_dict:
        brier_scores_dict[prefix].extend(brier_scores)
    else:
        brier_scores_dict[prefix] = brier_scores

    if prefix in baseline_scores_dict:
        baseline_scores_dict[prefix].extend(baseline_scores)
    else:
        baseline_scores_dict[prefix] = baseline_scores

# Compute the average and standard deviation of normalized Brier score for each prefix
for prefix, scores in brier_scores_dict.items():
    sum_score = np.sum(scores)
    avg_brier_score.append((prefix, sum_score))

for prefix, scores in baseline_scores_dict.items():
    sum_score = np.sum(scores)
    avg_baseline_score.append((prefix, sum_score))

normalized_score_list = []
for i in range(len(avg_brier_score)):
    normalized_score = avg_brier_score[i][1]/avg_baseline_score[i][1]
    cell = (avg_brier_score[i][0], normalized_score)
    normalized_score_list.append(cell)


# Print the scores with average and standard deviation along with variable names
print("Normalized Brier scores for each categorical variable:")
for prefix, avg_score in normalized_score_list:
    print(f"{prefix}: {avg_score:.2f} ")
    cat_normalized_brier.append(avg_score)

Normalized Brier scores for each categorical variable:
NRELAP: 1.01 
CESEV: 1.72 


In [34]:
# Remove rows in y_test and y_pred where the variable in question is missing in y_test (since without it, it is not possible to calculate the score)
y_test_cv = []
y_pred_cv = []

for j in range(len(y_test_list)): 
    y_test_targ = []
    y_pred_targ = []
    nvar=y_test_list[0].shape[1]

    for i in range(0, nvar):  
        missing_rows_mask = y_test_list[j].iloc[:, i].isna()
        y_test = y_test_list[j].iloc[:, i][~missing_rows_mask]
        y_pred = y_pred_list[j].iloc[:, i][~missing_rows_mask]
        
        y_test_targ.append(y_test)
        y_pred_targ.append(y_pred)
    
    y_test_cv.append(y_test_targ)
    y_pred_cv.append(y_pred_targ)



# OBTAIN NORMALIZED MSE 

# Initialize a list to store scores
scores_with_std = []

# Iterate over each outcome variable in the folds
for variable_name in variables: 
    variable_scores = []
    
    # Check if the target variable is numerical or categorical
    if y_test_cv[0][variables.index(variable_name)].dtype.kind in 'bifc':
        # Compute scores for the variable across all folds
        for fold_index in range(len(y_test_cv)):
            y_test = y_test_cv[fold_index][variables.index(variable_name)] 
            y_pred = y_pred_cv[fold_index][variables.index(variable_name)] 
            y_train = y_train_list[fold_index][variable_name]

            score = normalized_mean_squared_error(y_test, y_pred, y_train)
            variable_scores.append(score)
        
        # Compute the average score for the variable across all folds
        variable_avg_score = np.mean(variable_scores)
        
        # Compute the standard deviation for the variable across all folds
        variable_std_score = np.std(variable_scores)
        
        # Append the tuple with three elements to the scores_with_std list
        scores_with_std.append((variable_name, variable_avg_score, variable_std_score))

num_normalized_brier=[]
num_std_brier=[]
print("Scores for each outcome (chain - true values):")
for variable_name, avg_score, std_score in scores_with_std:
    print(f"{variable_name}: {avg_score:.2f} (± {std_score:.2f})")
    num_normalized_brier.append(avg_score)
    num_std_brier.append(std_score)

Scores for each outcome (chain - true values):
KFSS_M-2y: 0.71 (± 0.07)
KFSS_P-2y: 0.81 (± 0.08)
EDSS-2y: 0.50 (± 0.02)
T25FW-2y: 0.99 (± 0.07)
NHPT-2y: 0.93 (± 0.05)
P_R36-SF12-after: 0.83 (± 0.03)
M_R36-SF12-after: 1.02 (± 0.05)
SES_after: 0.99 (± 0.04)
SLEC_after: 1.05 (± 0.06)
KFSS_M-after_2y: 0.71 (± 0.03)
KFSS_P-after_2y: 1.00 (± 0.14)
EDSS-after_2y: 0.57 (± 0.04)


In [35]:
# Concatenate normalized brier scores for all variables (both numerical and categorical) 
combined_normalized_brier = np.concatenate((num_normalized_brier, cat_normalized_brier))
print(combined_normalized_brier)

# Compute the average relative Brier score
average_normalized_brier = np.mean(combined_normalized_brier)
print("Average relative Brier score:", average_normalized_brier)

[0.7144871  0.80575847 0.49564878 0.99310597 0.93164642 0.83115637
 1.01895165 0.99268779 1.04569312 0.71179869 1.00387099 0.57478256
 1.01221428 1.72017553]
Average relative Brier score: 0.9179984078386777
