In [47]:
# reload benchmark_ae   
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
from autoemulate.compare import AutoEmulate
from benchmark_ae.fetch_data import fetch_data
from autoemulate.utils import get_model_name

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [52]:
from pathlib import Path
path = Path('../data/')
paths = [path/'cardiac/cardiac{}/processed'.format(i) for i in range(1,13)]

# run autoemulate on all cardiac datasets

In [104]:
path
# get path as string
path_str = str(path)
path_str

'../data/cardiac/cardiac12/processed'

In [128]:
def run_ae(path):
    X, y = fetch_data(path)
    em = AutoEmulate()
    em.setup(X, y, n_jobs=-1, use_grid_search=True, grid_search_iters=50, grid_search_type='random')
    em.compare()
    best_model_name = get_model_name(em.best_model)
    cv_scores = em.scores_df
    cv_scores['path'] = str(path)
    cv_scores['best_model'] = best_model_name
    return cv_scores

In [127]:
results_df = pd.concat([run_ae(path) for path in paths])
# write to csv
results_df.to_csv('results_cardiac_random_50.csv', index=False)

autoemulate - Performing grid search for SecondOrderPolynomial...
autoemulate - Best parameters for SecondOrderPolynomial: {}
autoemulate - Cross-validating SecondOrderPolynomial...
autoemulate - Parameters: {'degree': 2}
autoemulate - Performing grid search for RBF...
autoemulate - Best parameters for RBF: {'model__degree': 2, 'model__kernel': 'quintic', 'model__smoothing': 0.9800793254771584}
autoemulate - Cross-validating RBF...
autoemulate - Parameters: {'degree': 2, 'epsilon': 1.0, 'kernel': 'quintic', 'smoothing': 0.9800793254771584}
autoemulate - Performing grid search for RandomForest...


KeyboardInterrupt: 

In [107]:
results_df

Unnamed: 0,model,metric,fold,score,path,best_model
0,SecondOrderPolynomial,rsme,0,1.214096,../data/cardiac/cardiac1/processed,RBF
1,SecondOrderPolynomial,rsme,1,1.834830,../data/cardiac/cardiac1/processed,RBF
2,SecondOrderPolynomial,rsme,2,1.210832,../data/cardiac/cardiac1/processed,RBF
3,SecondOrderPolynomial,rsme,3,1.204286,../data/cardiac/cardiac1/processed,RBF
4,SecondOrderPolynomial,rsme,4,1.354231,../data/cardiac/cardiac1/processed,RBF
...,...,...,...,...,...,...
75,NeuralNetSk,r2,0,0.156044,../data/cardiac/cardiac2/processed,SecondOrderPolynomial
76,NeuralNetSk,r2,1,-0.005696,../data/cardiac/cardiac2/processed,SecondOrderPolynomial
77,NeuralNetSk,r2,2,-0.015550,../data/cardiac/cardiac2/processed,SecondOrderPolynomial
78,NeuralNetSk,r2,3,0.021491,../data/cardiac/cardiac2/processed,SecondOrderPolynomial


In [83]:
# mapping from cardiac to descriptive

cardiac_descriptions = {
    "cardiac1": "Atrial Ionic",
    "cardiac2": "Atrial Isotonic Contraction",
    "cardiac3": "Atrial Isometric Stretch 1.0",
    "cardiac4": "Atrial Isometric Stretch 1.1",
    "cardiac5": "CircAdap",
    "cardiac6": "Fourchamber Model",
    "cardiac7": "Passive Mechanics",
    "cardiac8": "Tissue Electrophysiology",
    "cardiac9": "Ventricular Ionic",
    "cardiac10": "Ventricular Isotonic Contraction",
    "cardiac11": "Ventricular Isometric Stretch 1.0",
    "cardiac12": "Ventricular Isometric Stretch 1.1"
}

In [123]:
(results_df
    # filter out mteric = rsme
    .query('metric != "rsme"')
    .groupby(['path', 'metric', 'model'])
    .agg({'score': ['mean', 'std']})
    # sort by mean score for each path
    .sort_values(by=['path', ('score', 'mean')], ascending=False)
    # sort path from cardiac1 to cardiac12
    .sort_index(level=0, sort_remaining=False)
    .reset_index()
    # Extract cardiac number and create a new column
    .assign(cardiac=lambda df: df['path'].str.extract('(cardiac\d+)')[0])
)

Unnamed: 0_level_0,path,metric,model,score,score,cardiac
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,mean,std,Unnamed: 6_level_1
0,../data/cardiac/cardiac1/processed,r2,RBF,0.994564,0.001094,cardiac1
1,../data/cardiac/cardiac1/processed,r2,GaussianProcessSk,0.994522,0.000366,cardiac1
2,../data/cardiac/cardiac1/processed,r2,SecondOrderPolynomial,0.994057,0.001043,cardiac1
3,../data/cardiac/cardiac1/processed,r2,SupportVectorMachines,0.952853,0.008369,cardiac1
4,../data/cardiac/cardiac1/processed,r2,GradientBoosting,0.920941,0.006775,cardiac1
5,../data/cardiac/cardiac1/processed,r2,XGBoost,0.891877,0.015081,cardiac1
6,../data/cardiac/cardiac1/processed,r2,RandomForest,0.655266,0.018354,cardiac1
7,../data/cardiac/cardiac1/processed,r2,NeuralNetSk,-156.020665,39.703207,cardiac1
8,../data/cardiac/cardiac2/processed,r2,SecondOrderPolynomial,0.834925,0.010954,cardiac2
9,../data/cardiac/cardiac2/processed,r2,RBF,0.81838,0.014362,cardiac2


{'RBF':                     model metric  fold       score
 0   SecondOrderPolynomial   rsme     0    1.658468
 1   SecondOrderPolynomial   rsme     1    1.550802
 2   SecondOrderPolynomial   rsme     2    1.276326
 3   SecondOrderPolynomial   rsme     3    1.059498
 4   SecondOrderPolynomial   rsme     4    1.224820
 ..                    ...    ...   ...         ...
 75            NeuralNetSk     r2     0 -110.597701
 76            NeuralNetSk     r2     1 -118.794065
 77            NeuralNetSk     r2     2 -127.591954
 78            NeuralNetSk     r2     3 -155.855949
 79            NeuralNetSk     r2     4 -184.621297
 
 [80 rows x 4 columns]}

In [33]:
# load csv as numpy array
X,y = fetch_data(path/'cardiac/cardiac1/processed')

In [34]:
em = AutoEmulate()
em.setup(X, y, n_jobs=-1, grid_search_iters=50, folds=5, grid_search_type='random')
em.compare()

autoemulate - Cross-validating SecondOrderPolynomial...
autoemulate - Parameters: {'degree': 2}
autoemulate - Cross-validating RBF...
autoemulate - Parameters: {'degree': 1, 'epsilon': 1.0, 'kernel': 'thin_plate_spline', 'smoothing': 0.0}
autoemulate - Cross-validating RandomForest...
autoemulate - Parameters: {'bootstrap': True, 'criterion': 'squared_error', 'max_depth': None, 'max_features': 1.0, 'max_samples': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100, 'oob_score': False, 'random_state': None}
autoemulate - Cross-validating GradientBoosting...
autoemulate - Parameters: {'estimator__ccp_alpha': 0.0, 'estimator__learning_rate': 0.1, 'estimator__loss': 'squared_error', 'estimator__max_depth': 3, 'estimator__max_features': None, 'estimator__min_samples_leaf': 1, 'estimator__min_samples_split': 2, 'estimator__n_estimators': 100, 'estimator__n_iter_no_change': None, 'estimator__random_state': None, 'estimator__subsample': 1.0, 'estimator': GradientBoosting()

In [48]:

em.scores_df


Unnamed: 0,model,metric,fold,score
0,SecondOrderPolynomial,rsme,0,1.148593
1,SecondOrderPolynomial,rsme,1,2.113252
2,SecondOrderPolynomial,rsme,2,1.335141
3,SecondOrderPolynomial,rsme,3,1.041811
4,SecondOrderPolynomial,rsme,4,1.229983
...,...,...,...,...
75,NeuralNetSk,r2,0,-125.558727
76,NeuralNetSk,r2,1,-113.039760
77,NeuralNetSk,r2,2,-108.484559
78,NeuralNetSk,r2,3,-148.870362
