In [1]:
# Note: Requires R packages nlme and splines that has to be manually installed.
# This code also uses pyebm toolbox, which can be installed by pip install pyebm.

In [2]:
#Train model on data set D1-D2 and predict for D4
import pandas as pd
import datetime
from pathlib import Path
from tadpole_algorithms.models.emc1 import train_and_predict
from tadpole_algorithms.preprocessing.split import split_test_train_tadpole

"""
Train model on ADNI data set D1 / D2
Predict for subjects in the data set D2
"""

# Load D1_D2 train and possible test data set
data_path_train_test = Path("/mnt/data/Papers/TADPOLEShare/EMC1/jupyter/data/TADPOLE_D1_D2.csv")
data_df_train_test = pd.read_csv(data_path_train_test,low_memory=False)
idx_progress=np.logical_and(data_df_train_test['DXCHANGE']>=4, data_df_train_test['DXCHANGE']<=6)
SubC=np.unique(data_df_train_test.loc[idx_progress,'RID'])
SubC = pd.Series(SubC);
# Load D4 evaluation data set 
data_path_eval = Path("/mnt/data/Papers/TADPOLEShare/EMC1/jupyter/data/TADPOLE_D4_corr.csv")
data_df_eval = pd.read_csv(data_path_eval)

train_df, test_df, eval_df = split_test_train_tadpole(data_df_train_test, data_df_eval)



# Note: This step takes > 1 hour to generate predictions without bootstrapping (n_boot = 0).
# For computation of coverage probability and Weighted error score of ADAS and Ventricle prediction, choose n_boot > 1. 
# Each iteration of bootstrapping takes approximately 1 hour of computation time.
n_boot = 0
IntermediateFolder = '/mnt/data/Papers/TADPOLEShare/EMC1/jupyter/data/EMC1_IntermediateData'
forecast_df_d2=train_and_predict.main(train_df, test_df,IntermediateFolder,n_boot)


Preparing Data.
Step 1 / 6 Complete. Selecting Features.
Step 2 / 6 Complete. Training DEBM.
Step 3 / 6 Complete. Predicting feature values at future timepoints.
Step 4 / 6 Complete. Training SVM classifier.
Step 5 / 6 Complete. Predicting ADAS and Ventricle values.
Step 6 / 6 Complete. Computing confidence intervals with bootstrapping.
[0][1][2]

In [None]:
#Train model on data set D1-D3 and predict for D4
import pandas as pd
import datetime
from pathlib import Path
from tadpole_algorithms.models.emc1 import train_and_predict
from tadpole_algorithms.preprocessing.split import split_test_train_d3

# Load D1_D2 train and possible test data set
data_path_train_test = Path("/mnt/data/Papers/TADPOLEShare/EMC1/jupyter/data/TADPOLE_D1_D2.csv")
data_df_train_test = pd.read_csv(data_path_train_test,low_memory=False)

# Load D3 possible test set
data_path_D3 = Path("/mnt/data/Papers/TADPOLEShare/EMC1/jupyter/data/TADPOLE_D3.csv")
data_df_D3 = pd.read_csv(data_path_D3)

# Load D4 evaluation data set 
data_path_eval = Path("/mnt/data/Papers/TADPOLEShare/EMC1/jupyter/data/TADPOLE_D4_corr.csv")
data_df_eval = pd.read_csv(data_path_eval)

train_df, test_df, eval_df = split_test_train_d3(data_df_train_test, data_df_D3, data_df_eval)
# Note: This step takes ~ 1 hour to generate predictions without bootstrapping (n_boot=0).
# For computation of coverage probability and Weighted error score of ADAS and Ventricle prediction, choose n_boot > 1.
n_boot = 0
IntermediateFolder = '/mnt/data/Papers/TADPOLEShare/EMC1/jupyter/data/EMC1_IntermediateDataD3'
forecast_df_d3 = train_and_predict.main(train_df, test_df,IntermediateFolder,n_boot)

In [3]:
# Evaluate D1-D2 Predictions
from tadpole_algorithms.evaluation import print_metrics
from tadpole_algorithms.evaluation import evaluate_forecast

dictionary=evaluate_forecast(eval_df,forecast_df_d2)
print_metrics(dictionary)

[[74 12  0]
 [16 74  2]
 [ 2 14 16]]
mAUC (multiclass Area Under Curve): 0.903
bca (balanced classification accuracy): 0.798
adasMAE (ADAS13 Mean Absolute Error): 6.170
ventsMAE (Ventricles Mean Absolute Error), in % ICV: 0.416
adasWES (ADAS13 Weighted Error Score): 6.736
ventsWES (Ventricles Weighted Error Score ), in % ICV: 0.254
adasCPA (ADAS13 Coverage Probability Accuracy for 50% Confidence Interval: 0.463
ventsCPA (Ventricles Coverage Probability Accuracy for 50% Confidence Interval: 0.460


In [3]:
# Evaluate D1-D3 Predictions
from tadpole_algorithms.evaluation import print_metrics
from tadpole_algorithms.evaluation import evaluate_forecast

dictionary=evaluate_forecast(eval_df,forecast_df_d3)
print_metrics(dictionary)

[[77  9  0]
 [28 62  2]
 [ 3 13 16]]
mAUC (multiclass Area Under Curve): 0.897
bca (balanced classification accuracy): 0.770
adasMAE (ADAS13 Mean Absolute Error): 6.484
ventsMAE (Ventricles Mean Absolute Error), in % ICV: 0.835
adasWES (ADAS13 Weighted Error Score): nan
ventsWES (Ventricles Weighted Error Score ), in % ICV: nan
adasCPA (ADAS13 Coverage Probability Accuracy for 50% Confidence Interval: 0.500
ventsCPA (Ventricles Coverage Probability Accuracy for 50% Confidence Interval: 0.500
