# Benchmark SVM
This python notebook trains, tests and evaluates the benchmark svm model. in the first piece of code, testing is done on a longitudinal data set (D2) and in the second piece of code, tesing is done on a cross-sectional data set (D3).
Data set used for training and evalution are D1_D2 and D4 ADNI data sets respectively.


Train model on data set D1_D2 and test model on longitudinal data set D2

In [1]:
import pandas as pd
import datetime
from pathlib import Path
from tadpole_algorithms.models import BenchmarkSVM
from tadpole_algorithms.preprocessing.split import split_test_train_tadpole

"""
Train model on ADNI data set D1_D2
Test model on ADNI data set D2 rollover participants
"""

# Load D1_D2 train and possible test data set
data_path_train_test = Path("data/TADPOLE_D1_D2.csv")
data_df_train_test = pd.read_csv(data_path_train_test)

# Load D4 evaluation data set
data_path_eval = Path("data/TADPOLE_D4_corr.csv")
data_df_eval = pd.read_csv(data_path_eval)

# Split data in test, train and evaluation data
train_df, test_df, eval_df = split_test_train_tadpole(data_df_train_test, data_df_eval)
test_df = test_df.fillna(0)

# Define and train model
model = BenchmarkSVM()
model.train(train_df)

# Predict forecast on the test set
forecast_df_d2 = model.predict(test_df)


Train model on data set D1_D2 and test model on cross sectional data set D3

In [3]:
import pandas as pd
import datetime
from pathlib import Path
from tadpole_algorithms.models import BenchmarkSVM
from tadpole_algorithms.preprocessing.split import split_test_train_d3
from tadpole_algorithms.preprocessing.rewrite_df import rewrite_d3

"""
Train model on ADNI data set D1_D2
Test model on ADNI data set D3 rollover participants
"""

# Load D1_D2 train and possible test data set
data_path_train = Path("data/TADPOLE_D1_D2.csv")
data_df_train = pd.read_csv(data_path_train)

# Load D3 possible test set
data_path_test = Path("data/TADPOLE_D3.csv")
data_df_test = pd.read_csv(data_path_test)

# Load D4 evaluation data set
data_path_eval = Path("data/TADPOLE_D4_corr.csv")
data_df_eval = pd.read_csv(data_path_eval)

# Split data in test, train and evulation data
train_df, test_df, eval_df = split_test_train_d3(data_df_train, data_df_test, data_df_eval)
test_df = test_df.fillna(0)
test_df = rewrite_d3(test_df)

# Define and train model
model = BenchmarkSVM()
model.train(train_df)

# Predict forecast on the test set
forecast_df_d3 = model.predict(test_df)

Evaluate model tested on D2 on ADNI data set D4

In [2]:
from tadpole_algorithms.evaluation import evaluate_forecast
from tadpole_algorithms.evaluation import print_metrics

# Evaluate the model 
dictionary = evaluate_forecast(eval_df, forecast_df_d2)

# Print metrics
print_metrics(dictionary)

[[77  9  0]
 [29 61  2]
 [ 4 12 16]]
mAUC (multiclass Area Under Curve): 0.797
bca (balanced classification accuracy): 0.767
adasMAE (ADAS13 Mean Aboslute Error): 9.241
ventsMAE (Ventricles Mean Aboslute Error): 0.019
adasWES (ADAS13 Weighted Error Score): 9.241
ventsWES (Ventricles Weighted Error Score ): 0.019
adasCPA (ADAS13 Coverage Probability Accuracy for 50% Confidence Interval: 0.477
ventsCPA (Ventricles Coverage Probability Accuracy for 50% Confidence Interval: 0.500


Evaluate model tested on D3 on ADNI data set D4

In [4]:
from tadpole_algorithms.evaluation import evaluate_forecast
from tadpole_algorithms.evaluation import print_metrics

# Evaluate the model 
dictionary = evaluate_forecast(eval_df, forecast_df_d3)

# Print metrics
print_metrics(dictionary)

[[78  8  0]
 [26 64  2]
 [ 4 12 16]]
mAUC (multiclass Area Under Curve): 0.801
bca (balanced classification accuracy): 0.780
adasMAE (ADAS13 Mean Aboslute Error): 9.427
ventsMAE (Ventricles Mean Aboslute Error): 0.018
adasWES (ADAS13 Weighted Error Score): 9.427
ventsWES (Ventricles Weighted Error Score ): 0.018
adasCPA (ADAS13 Coverage Probability Accuracy for 50% Confidence Interval: 0.482
ventsCPA (Ventricles Coverage Probability Accuracy for 50% Confidence Interval: 0.500
