In [4]:
from sklearn.model_selection import ShuffleSplit, KFold, RepeatedStratifiedKFold
from sklearn.datasets import load_breast_cancer
from photonai.base import Hyperpipe, PipelineElement, Switch
from photonai.optimization import IntegerRange, FloatRange

import numpy as np
import pandas as pd

In [5]:
DATA_DIRECTORY = "/home/fehrdelt/data_ssd/data/clinical_data/Full/"

In [3]:
from sklearn.metrics import fbeta_score, make_scorer
from sklearn.metrics import confusion_matrix

ftwo_scorer = make_scorer(fbeta_score, beta=2)

def confusion_matrix_scorer(clf, X, y):

     y_pred = clf.predict(X)
     cm = confusion_matrix(y, y_pred)

     return {'tn': cm[0, 0], 'fp': cm[0, 1],
             'fn': cm[1, 0], 'tp': cm[1, 1]}

def false_neg_scorer(clf, X, y):

     y_pred = clf.predict(X)
     cm = confusion_matrix(y, y_pred)
     
     return cm[1, 0]

def false_pos_scorer(clf, X, y):

     y_pred = clf.predict(X)
     cm = confusion_matrix(y, y_pred)
     
     return cm[0, 1]

In [None]:
# Specify how results are going to be saved
# Define hyperpipe
hyperpipe = Hyperpipe('megaclassifpipeline',
                      project_folder = '/home/fehrdelt/data_ssd/MedicalImaging_GIN/photonAI_results',
                      optimizer="random_grid_search",
                      optimizer_params={'n_configurations': 30},
                      metrics=[('F2', ftwo_scorer), ('False_neg_scorer', false_neg_scorer), ('False_pos_scorer', false_pos_scorer), 'accuracy', 'precision', 'recall', 'f1_score', 'auc'],
                      best_config_metric="F2",
                      outer_cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, shuffle=True),
                      inner_cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, shuffle=True))
        
# Add transformer elements
hyperpipe += PipelineElement("SimpleImputer", hyperparameters={}, 
                             test_disabled=False, missing_values=np.nan, strategy='median', fill_value=0)
hyperpipe += PipelineElement("ImbalancedDataTransform", hyperparameters={'method_name': ['RandomUnderSampler', 'SMOTEEN']}, 
                             test_disabled=False)
# Add estimator
estimator_switch = Switch('EstimatorSwitch')
estimator_switch += PipelineElement("RandomForestClassifier", hyperparameters={'n_estimators': IntegerRange(5, 20), 'min_samples_split': IntegerRange(2,5), 'min_samples_leaf': IntegerRange(1,3)})
estimator_switch += PipelineElement("HistGradientBoostingClassifier", hyperparameters={'learning_rate': FloatRange(range_type="logspace", start=0.001, stop=1, num=10), 'l2_regularization': FloatRange(start=0.0, stop=1.0, num=5)})
estimator_switch += PipelineElement("LogisticRegression", hyperparameters={}, penality='l2', C=1, max_iter=100)
hyperpipe += estimator_switch




In [None]:
# Load data
df = pd.read_csv(DATA_DIRECTORY+"combined_clinical_data_volumes_outcome_TTS_ANTS_hist_match.csv", usecols=range(2,32))
X = np.asarray(df.iloc[:, :-1])
y = np.asarray(df.iloc[:, -1])

# Fit hyperpipe
hyperpipe.fit(X, y)