In this tutorial, we will analyze Lipoma dataset from the [WORC Database](https://github.com/MStarmans91/WORCDatabase/tree/development).

More details on the dataset as well as the original analysis performed by their authors can be found here:

`Starmans, M. P. A. et al. (2021). The WORC* database: MRI and CT scans, segmentations, and clinical labels for 932 patients from six radiomics studies. Submitted, preprint available from https://doi.org/10.1101/2021.08.19.21262238`

`The experiments are described in the following paper: Starmans, M. P. A. et al. (2021). Reproducible radiomics through automated machine learning validated on twelve clinical applications. Submitted, preprint available from https://arxiv.org/abs/2108.08618.`

In [None]:
# In case you haven't already installed AutoRadiomics
!pip install autorad

In [None]:
from autorad.external.download_WORC import download_WORCDatabase
from autorad.config import config
from pathlib import Path
import pandas as pd

# Set where we will save our data and results
base_dir = Path(config.TEST_DATA_DIR)
data_dir = base_dir / "worc_data"
result_dir = base_dir / "worc_results"

%load_ext autoreload
%autoreload 2


download_WORCDatabase(
    dataset="Liver",
    data_folder=data_dir,
    n_subjects=30,
    )

In [3]:
!ls $data_dir

[1m[36mLiver-006[m[m  [1m[36mLiver-030[m[m  [1m[36mLiver-038[m[m  [1m[36mLiver-107[m[m  [1m[36mLiver-146[m[m  [1m[36mLiver-166[m[m  labels.csv
[1m[36mLiver-008[m[m  [1m[36mLiver-031[m[m  [1m[36mLiver-054[m[m  [1m[36mLiver-115[m[m  [1m[36mLiver-147[m[m  [1m[36mLiver-168[m[m
[1m[36mLiver-011[m[m  [1m[36mLiver-033[m[m  [1m[36mLiver-067[m[m  [1m[36mLiver-122[m[m  [1m[36mLiver-154[m[m  [1m[36mLiver-180[m[m
[1m[36mLiver-021[m[m  [1m[36mLiver-035[m[m  [1m[36mLiver-079[m[m  [1m[36mLiver-124[m[m  [1m[36mLiver-156[m[m  [1m[36mLiver-184[m[m
[1m[36mLiver-023[m[m  [1m[36mLiver-037[m[m  [1m[36mLiver-106[m[m  [1m[36mLiver-128[m[m  [1m[36mLiver-159[m[m  [1m[36mLiver-185[m[m


In [4]:
from autorad.data.utils import get_paths_with_separate_folder_per_case

paths_df = get_paths_with_separate_folder_per_case(data_dir, relative=True)
paths_df

Unnamed: 0,ID,image_path,segmentation_path
0,Liver-035,Liver-035/image.nii.gz,Liver-035/segmentation.nii.gz
1,Liver-067,Liver-067/image.nii.gz,Liver-067/segmentation.nii.gz
2,Liver-033,Liver-033/image.nii.gz,Liver-033/segmentation.nii.gz
3,Liver-146,Liver-146/image.nii.gz,Liver-146/segmentation.nii.gz
4,Liver-184,Liver-184/image.nii.gz,Liver-184/segmentation.nii.gz
5,Liver-124,Liver-124/image.nii.gz,Liver-124/segmentation.nii.gz
6,Liver-115,Liver-115/image.nii.gz,Liver-115/segmentation.nii.gz
7,Liver-185,Liver-185/image.nii.gz,Liver-185/segmentation.nii.gz
8,Liver-147,Liver-147/image.nii.gz,Liver-147/segmentation.nii.gz
9,Liver-122,Liver-122/image.nii.gz,Liver-122/segmentation.nii.gz


In [5]:
from autorad.data.dataset import ImageDataset
from autorad.feature_extraction.extractor import FeatureExtractor

image_dataset = ImageDataset(
    paths_df,
    ID_colname="ID",
    root_dir=data_dir,
)
extractor = FeatureExtractor(image_dataset)
feature_df = extractor.run()

30it [00:31,  1.06s/it]


Unnamed: 0,ID,image_path,segmentation_path,diagnostics_Versions_PyRadiomics,diagnostics_Versions_Numpy,diagnostics_Versions_SimpleITK,diagnostics_Versions_PyWavelet,diagnostics_Versions_Python,diagnostics_Configuration_Settings,diagnostics_Configuration_EnabledImageTypes,...,wavelet-LLL_gldm_LargeDependenceLowGrayLevelEmphasis,wavelet-LLL_gldm_LowGrayLevelEmphasis,wavelet-LLL_gldm_SmallDependenceEmphasis,wavelet-LLL_gldm_SmallDependenceHighGrayLevelEmphasis,wavelet-LLL_gldm_SmallDependenceLowGrayLevelEmphasis,wavelet-LLL_ngtdm_Busyness,wavelet-LLL_ngtdm_Coarseness,wavelet-LLL_ngtdm_Complexity,wavelet-LLL_ngtdm_Contrast,wavelet-LLL_ngtdm_Strength
0,Liver-035,/Users/p.woznicki/git/AutoRadiomics/tests/test...,/Users/p.woznicki/git/AutoRadiomics/tests/test...,v3.0.1,1.22.1,2.1.1.2,1.2.0,3.10.4,"{'minimumROIDimensions': 1, 'minimumROISize': ...","{'Original': {}, 'LoG': {'sigma': [3.0, 5.0]},...",...,0.490335565711314,0.0092392563097354,0.1049786317887887,17.108519817338614,0.0019064162301314,2.195837937351879,0.0010111456662735,426.9975349731685,0.0619020314532844,0.284974468309886
1,Liver-067,/Users/p.woznicki/git/AutoRadiomics/tests/test...,/Users/p.woznicki/git/AutoRadiomics/tests/test...,v3.0.1,1.22.1,2.1.1.2,1.2.0,3.10.4,"{'minimumROIDimensions': 1, 'minimumROISize': ...","{'Original': {}, 'LoG': {'sigma': [3.0, 5.0]},...",...,0.1726950112519562,0.0084378080208024,0.2387784244404366,83.97021323093149,0.0023728396471364,0.7591978668706331,0.001202498835943,4314.0961725974375,0.0530871088832545,3.480993082112052
2,Liver-033,/Users/p.woznicki/git/AutoRadiomics/tests/test...,/Users/p.woznicki/git/AutoRadiomics/tests/test...,v3.0.1,1.22.1,2.1.1.2,1.2.0,3.10.4,"{'minimumROIDimensions': 1, 'minimumROISize': ...","{'Original': {}, 'LoG': {'sigma': [3.0, 5.0]},...",...,0.0023819900248588,0.0012766642609044,0.5654204077815189,1949.3340316747724,0.0011009446497239,0.0266051312038575,0.0076363940620611,17684.10707042901,0.3745009155825436,22.73438458589251
3,Liver-146,/Users/p.woznicki/git/AutoRadiomics/tests/test...,/Users/p.woznicki/git/AutoRadiomics/tests/test...,v3.0.1,1.22.1,2.1.1.2,1.2.0,3.10.4,"{'minimumROIDimensions': 1, 'minimumROISize': ...","{'Original': {}, 'LoG': {'sigma': [3.0, 5.0]},...",...,0.5235105253912582,0.0144098607235655,0.1017286419590698,9.821812246039771,0.0024608166414348,1.9618138907602145,0.0021490398335429,178.82845259100122,0.0331384950258136,0.3598787334834973
4,Liver-184,/Users/p.woznicki/git/AutoRadiomics/tests/test...,/Users/p.woznicki/git/AutoRadiomics/tests/test...,v3.0.1,1.22.1,2.1.1.2,1.2.0,3.10.4,"{'minimumROIDimensions': 1, 'minimumROISize': ...","{'Original': {}, 'LoG': {'sigma': [3.0, 5.0]},...",...,0.3036898583747069,0.006330218697061,0.107550038020076,33.0708290664619,0.0014813588608183,0.8257035290466124,0.0016166914766237,829.755720150696,0.0456811806461254,1.1242349494954111


In [None]:
feature_df.head()

In [6]:
label_df = pd.read_csv(data_dir / "labels.csv")
label_df.head()

Unnamed: 0,patient_ID,diagnosis
0,Liver-185,1
1,Liver-166,0
2,Liver-180,0
3,Liver-067,0
4,Liver-035,0


In [7]:
from autorad.data.dataset import FeatureDataset

merged_feature_df = feature_df.merge(label_df, left_on="ID",
    right_on="patient_ID", how="left")
feature_dataset = FeatureDataset(
    merged_feature_df,
    target="diagnosis",
    ID_colname="ID"
)

Split the data into training/validation/test sets:

In [8]:
splits_path = result_dir / "splits.json"
feature_dataset.split(method="train_val_test", save_path=splits_path)

{'split_type': '60% train + 20% validation + 20% test',
 'train': ['Liver-122',
  'Liver-124',
  'Liver-185',
  'Liver-023',
  'Liver-156',
  'Liver-054',
  'Liver-079',
  'Liver-128',
  'Liver-037',
  'Liver-115',
  'Liver-033',
  'Liver-106',
  'Liver-147',
  'Liver-038',
  'Liver-035',
  'Liver-067',
  'Liver-031',
  'Liver-168'],
 'val': ['Liver-006',
  'Liver-184',
  'Liver-180',
  'Liver-030',
  'Liver-011',
  'Liver-021'],
 'test': ['Liver-159',
  'Liver-008',
  'Liver-146',
  'Liver-166',
  'Liver-154',
  'Liver-107']}

In [9]:
from autorad.training.trainer import Trainer
from autorad.models.classifier import MLClassifier

models = MLClassifier.initialize_default_sklearn_models()
print(models)

[Random Forest, Logistic Regression, SVM, XGBoost]


In [11]:
trainer = Trainer(
    dataset=feature_dataset,
    models=models,
    result_dir=result_dir,
    experiment_name="Liver_detection",
)
trainer.run_auto_preprocessing(oversampling=False)

Fitting 5 folds for each of 100 candidates, totalling 500 fits


  model = cd_fast.enet_coordinate_descent(


[CV 1/5] END ......................alpha=1e-05;, score=-1.531 total time=   0.4s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 2/5] END ......................alpha=1e-05;, score=-0.785 total time=   0.3s
[CV 3/5] END ......................alpha=1e-05;, score=-0.186 total time=   0.2s


  model = cd_fast.enet_coordinate_descent(


[CV 4/5] END ......................alpha=1e-05;, score=-0.137 total time=   0.2s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 5/5] END ......................alpha=1e-05;, score=-1.949 total time=   0.2s
[CV 1/5] END .....alpha=1.1497569953977357e-05;, score=-1.423 total time=   0.1s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 2/5] END .....alpha=1.1497569953977357e-05;, score=-0.780 total time=   0.2s
[CV 3/5] END .....alpha=1.1497569953977357e-05;, score=-0.183 total time=   0.1s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 4/5] END .....alpha=1.1497569953977357e-05;, score=-0.140 total time=   0.2s
[CV 5/5] END .....alpha=1.1497569953977357e-05;, score=-1.930 total time=   0.1s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 1/5] END .....alpha=1.3219411484660286e-05;, score=-1.324 total time=   0.2s
[CV 2/5] END .....alpha=1.3219411484660286e-05;, score=-0.765 total time=   0.1s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 3/5] END .....alpha=1.3219411484660286e-05;, score=-0.175 total time=   0.2s
[CV 4/5] END .....alpha=1.3219411484660286e-05;, score=-0.147 total time=   0.2s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 5/5] END .....alpha=1.3219411484660286e-05;, score=-1.920 total time=   0.2s
[CV 1/5] END .....alpha=1.5199110829529332e-05;, score=-1.235 total time=   0.1s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 2/5] END .....alpha=1.5199110829529332e-05;, score=-0.740 total time=   0.3s
[CV 3/5] END .....alpha=1.5199110829529332e-05;, score=-0.160 total time=   0.1s
[CV 4/5] END .....alpha=1.5199110829529332e-05;, score=-0.163 total time=   0.1s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 5/5] END .....alpha=1.5199110829529332e-05;, score=-1.928 total time=   0.1s
[CV 1/5] END ......alpha=1.747528400007683e-05;, score=-1.192 total time=   0.1s
[CV 2/5] END ......alpha=1.747528400007683e-05;, score=-0.710 total time=   0.1s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 3/5] END ......alpha=1.747528400007683e-05;, score=-0.155 total time=   0.1s
[CV 4/5] END ......alpha=1.747528400007683e-05;, score=-0.179 total time=   0.1s
[CV 5/5] END ......alpha=1.747528400007683e-05;, score=-1.887 total time=   0.1s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 1/5] END .....alpha=2.0092330025650458e-05;, score=-1.170 total time=   0.1s
[CV 2/5] END .....alpha=2.0092330025650458e-05;, score=-0.679 total time=   0.1s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 3/5] END .....alpha=2.0092330025650458e-05;, score=-0.151 total time=   0.1s
[CV 4/5] END .....alpha=2.0092330025650458e-05;, score=-0.196 total time=   0.2s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 5/5] END .....alpha=2.0092330025650458e-05;, score=-1.786 total time=   0.1s
[CV 1/5] END ......alpha=2.310129700083158e-05;, score=-1.156 total time=   0.1s
[CV 2/5] END ......alpha=2.310129700083158e-05;, score=-0.648 total time=   0.1s
[CV 3/5] END ......alpha=2.310129700083158e-05;, score=-0.147 total time=   0.1s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 4/5] END ......alpha=2.310129700083158e-05;, score=-0.190 total time=   0.0s
[CV 5/5] END ......alpha=2.310129700083158e-05;, score=-1.677 total time=   0.0s
[CV 1/5] END ......alpha=2.656087782946684e-05;, score=-1.074 total time=   0.0s
[CV 2/5] END ......alpha=2.656087782946684e-05;, score=-0.626 total time=   0.0s
[CV 3/5] END ......alpha=2.656087782946684e-05;, score=-0.140 total time=   0.0s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 4/5] END ......alpha=2.656087782946684e-05;, score=-0.164 total time=   0.0s
[CV 5/5] END ......alpha=2.656087782946684e-05;, score=-1.567 total time=   0.0s
[CV 1/5] END ......alpha=3.053855508833412e-05;, score=-1.030 total time=   0.0s
[CV 2/5] END ......alpha=3.053855508833412e-05;, score=-0.604 total time=   0.0s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 3/5] END ......alpha=3.053855508833412e-05;, score=-0.136 total time=   0.1s
[CV 4/5] END ......alpha=3.053855508833412e-05;, score=-0.145 total time=   0.1s
[CV 5/5] END ......alpha=3.053855508833412e-05;, score=-1.451 total time=   0.0s
[CV 1/5] END ......alpha=3.511191734215127e-05;, score=-0.988 total time=   0.0s
[CV 2/5] END ......alpha=3.511191734215127e-05;, score=-0.587 total time=   0.0s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 3/5] END ......alpha=3.511191734215127e-05;, score=-0.127 total time=   0.1s
[CV 4/5] END ......alpha=3.511191734215127e-05;, score=-0.124 total time=   0.1s
[CV 5/5] END ......alpha=3.511191734215127e-05;, score=-1.283 total time=   0.1s
[CV 1/5] END ......alpha=4.037017258596558e-05;, score=-0.904 total time=   0.0s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 2/5] END ......alpha=4.037017258596558e-05;, score=-0.569 total time=   0.1s
[CV 3/5] END ......alpha=4.037017258596558e-05;, score=-0.122 total time=   0.1s
[CV 4/5] END ......alpha=4.037017258596558e-05;, score=-0.103 total time=   0.0s
[CV 5/5] END ......alpha=4.037017258596558e-05;, score=-1.172 total time=   0.1s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 1/5] END ......alpha=4.641588833612782e-05;, score=-0.804 total time=   0.0s
[CV 2/5] END ......alpha=4.641588833612782e-05;, score=-0.553 total time=   0.1s
[CV 3/5] END ......alpha=4.641588833612782e-05;, score=-0.122 total time=   0.0s
[CV 4/5] END ......alpha=4.641588833612782e-05;, score=-0.082 total time=   0.0s
[CV 5/5] END ......alpha=4.641588833612782e-05;, score=-1.066 total time=   0.0s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 1/5] END .....alpha=5.3366992312063123e-05;, score=-0.722 total time=   0.1s
[CV 2/5] END .....alpha=5.3366992312063123e-05;, score=-0.537 total time=   0.0s
[CV 3/5] END .....alpha=5.3366992312063123e-05;, score=-0.123 total time=   0.0s
[CV 4/5] END .....alpha=5.3366992312063123e-05;, score=-0.057 total time=   0.0s
[CV 5/5] END .....alpha=5.3366992312063123e-05;, score=-0.968 total time=   0.0s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 1/5] END ......alpha=6.135907273413175e-05;, score=-0.675 total time=   0.0s
[CV 2/5] END ......alpha=6.135907273413175e-05;, score=-0.524 total time=   0.1s
[CV 3/5] END ......alpha=6.135907273413175e-05;, score=-0.128 total time=   0.0s
[CV 4/5] END ......alpha=6.135907273413175e-05;, score=-0.040 total time=   0.0s
[CV 5/5] END ......alpha=6.135907273413175e-05;, score=-0.860 total time=   0.0s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 1/5] END ......alpha=7.054802310718646e-05;, score=-0.678 total time=   0.2s
[CV 2/5] END ......alpha=7.054802310718646e-05;, score=-0.510 total time=   0.1s
[CV 3/5] END ......alpha=7.054802310718646e-05;, score=-0.122 total time=   0.1s
[CV 4/5] END ......alpha=7.054802310718646e-05;, score=-0.034 total time=   0.0s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 5/5] END ......alpha=7.054802310718646e-05;, score=-0.773 total time=   0.1s
[CV 1/5] END ......alpha=8.111308307896872e-05;, score=-0.713 total time=   0.0s
[CV 2/5] END ......alpha=8.111308307896872e-05;, score=-0.495 total time=   0.0s
[CV 3/5] END ......alpha=8.111308307896872e-05;, score=-0.124 total time=   0.0s
[CV 4/5] END ......alpha=8.111308307896872e-05;, score=-0.063 total time=   0.0s
[CV 5/5] END ......alpha=8.111308307896872e-05;, score=-0.712 total time=   0.0s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 1/5] END ......alpha=9.326033468832199e-05;, score=-0.751 total time=   0.1s
[CV 2/5] END ......alpha=9.326033468832199e-05;, score=-0.475 total time=   0.1s
[CV 3/5] END ......alpha=9.326033468832199e-05;, score=-0.132 total time=   0.0s
[CV 4/5] END ......alpha=9.326033468832199e-05;, score=-0.129 total time=   0.0s
[CV 5/5] END ......alpha=9.326033468832199e-05;, score=-0.674 total time=   0.0s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 1/5] END .....alpha=0.00010722672220103231;, score=-0.797 total time=   0.1s
[CV 2/5] END .....alpha=0.00010722672220103231;, score=-0.453 total time=   0.1s
[CV 3/5] END .....alpha=0.00010722672220103231;, score=-0.145 total time=   0.0s
[CV 4/5] END .....alpha=0.00010722672220103231;, score=-0.156 total time=   0.0s
[CV 5/5] END .....alpha=0.00010722672220103231;, score=-0.678 total time=   0.0s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 1/5] END ......alpha=0.0001232846739442066;, score=-0.825 total time=   0.0s
[CV 2/5] END ......alpha=0.0001232846739442066;, score=-0.436 total time=   0.0s
[CV 3/5] END ......alpha=0.0001232846739442066;, score=-0.156 total time=   0.1s
[CV 4/5] END ......alpha=0.0001232846739442066;, score=-0.173 total time=   0.1s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 5/5] END ......alpha=0.0001232846739442066;, score=-0.669 total time=   0.1s
[CV 1/5] END .....alpha=0.00014174741629268049;, score=-0.861 total time=   0.0s
[CV 2/5] END .....alpha=0.00014174741629268049;, score=-0.424 total time=   0.1s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 3/5] END .....alpha=0.00014174741629268049;, score=-0.163 total time=   0.3s
[CV 4/5] END .....alpha=0.00014174741629268049;, score=-0.180 total time=   0.0s
[CV 5/5] END .....alpha=0.00014174741629268049;, score=-0.667 total time=   0.0s
[CV 1/5] END .....alpha=0.00016297508346206434;, score=-0.909 total time=   0.0s
[CV 2/5] END .....alpha=0.00016297508346206434;, score=-0.413 total time=   0.0s
[CV 3/5] END .....alpha=0.00016297508346206434;, score=-0.174 total time=   0.0s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 4/5] END .....alpha=0.00016297508346206434;, score=-0.188 total time=   0.0s
[CV 5/5] END .....alpha=0.00016297508346206434;, score=-0.669 total time=   0.0s
[CV 1/5] END ......alpha=0.0001873817422860383;, score=-0.964 total time=   0.0s
[CV 2/5] END ......alpha=0.0001873817422860383;, score=-0.404 total time=   0.0s
[CV 3/5] END ......alpha=0.0001873817422860383;, score=-0.187 total time=   0.0s
[CV 4/5] END ......alpha=0.0001873817422860383;, score=-0.195 total time=   0.0s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 5/5] END ......alpha=0.0001873817422860383;, score=-0.677 total time=   0.1s
[CV 1/5] END .....alpha=0.00021544346900318845;, score=-0.993 total time=   0.0s
[CV 2/5] END .....alpha=0.00021544346900318845;, score=-0.395 total time=   0.0s
[CV 3/5] END .....alpha=0.00021544346900318845;, score=-0.204 total time=   0.0s
[CV 4/5] END .....alpha=0.00021544346900318845;, score=-0.209 total time=   0.0s
[CV 5/5] END .....alpha=0.00021544346900318845;, score=-0.685 total time=   0.0s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 1/5] END ......alpha=0.0002477076355991711;, score=-1.010 total time=   0.0s
[CV 2/5] END ......alpha=0.0002477076355991711;, score=-0.386 total time=   0.0s
[CV 3/5] END ......alpha=0.0002477076355991711;, score=-0.228 total time=   0.0s
[CV 4/5] END ......alpha=0.0002477076355991711;, score=-0.230 total time=   0.0s
[CV 5/5] END ......alpha=0.0002477076355991711;, score=-0.691 total time=   0.0s
[CV 1/5] END ......alpha=0.0002848035868435802;, score=-1.014 total time=   0.0s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 2/5] END ......alpha=0.0002848035868435802;, score=-0.377 total time=   0.0s
[CV 3/5] END ......alpha=0.0002848035868435802;, score=-0.245 total time=   0.0s
[CV 4/5] END ......alpha=0.0002848035868435802;, score=-0.265 total time=   0.0s
[CV 5/5] END ......alpha=0.0002848035868435802;, score=-0.700 total time=   0.0s
[CV 1/5] END .....alpha=0.00032745491628777284;, score=-0.984 total time=   0.0s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 2/5] END .....alpha=0.00032745491628777284;, score=-0.366 total time=   0.0s
[CV 3/5] END .....alpha=0.00032745491628777284;, score=-0.280 total time=   0.1s
[CV 4/5] END .....alpha=0.00032745491628777284;, score=-0.287 total time=   0.0s
[CV 5/5] END .....alpha=0.00032745491628777284;, score=-0.709 total time=   0.0s
[CV 1/5] END .....alpha=0.00037649358067924675;, score=-0.964 total time=   0.0s
[CV 2/5] END .....alpha=0.00037649358067924675;, score=-0.355 total time=   0.0s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 3/5] END .....alpha=0.00037649358067924675;, score=-0.335 total time=   0.0s
[CV 4/5] END .....alpha=0.00037649358067924675;, score=-0.302 total time=   0.1s
[CV 5/5] END .....alpha=0.00037649358067924675;, score=-0.717 total time=   0.0s
[CV 1/5] END .....alpha=0.00043287612810830614;, score=-0.944 total time=   0.0s
[CV 2/5] END .....alpha=0.00043287612810830614;, score=-0.352 total time=   0.0s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 3/5] END .....alpha=0.00043287612810830614;, score=-0.374 total time=   0.0s
[CV 4/5] END .....alpha=0.00043287612810830614;, score=-0.308 total time=   0.0s
[CV 5/5] END .....alpha=0.00043287612810830614;, score=-0.726 total time=   0.0s
[CV 1/5] END ......alpha=0.0004977023564332114;, score=-0.910 total time=   0.0s
[CV 2/5] END ......alpha=0.0004977023564332114;, score=-0.353 total time=   0.0s
[CV 3/5] END ......alpha=0.0004977023564332114;, score=-0.428 total time=   0.0s
[CV 4/5] END ......alpha=0.0004977023564332114;, score=-0.306 total time=   0.0s
[CV 5/5] END ......alpha=0.0004977023564332114;, score=-0.736 total time=   0.1s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 1/5] END ......alpha=0.0005722367659350221;, score=-0.904 total time=   0.2s
[CV 2/5] END ......alpha=0.0005722367659350221;, score=-0.354 total time=   0.1s
[CV 3/5] END ......alpha=0.0005722367659350221;, score=-0.427 total time=   0.1s
[CV 4/5] END ......alpha=0.0005722367659350221;, score=-0.304 total time=   0.0s
[CV 5/5] END ......alpha=0.0005722367659350221;, score=-0.737 total time=   0.1s
[CV 1/5] END ......alpha=0.0006579332246575682;, score=-0.913 total time=   0.0s
[CV 2/5] END ......alpha=0.0006579332246575682;, score=-0.353 total time=   0.0s
[CV 3/5] END ......alpha=0.0006579332246575682;, score=-0.407 total time=   0.0s
[CV 4/5] END ......alpha=0.0006579332246575682;, score=-0.303 total time=   0.0s
[CV 5/5] END ......alpha=0.0006579332246575682;, score=-0.737 total time=   0.0s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 1/5] END .......alpha=0.000756463327554629;, score=-0.918 total time=   0.0s
[CV 2/5] END .......alpha=0.000756463327554629;, score=-0.349 total time=   0.0s
[CV 3/5] END .......alpha=0.000756463327554629;, score=-0.387 total time=   0.1s
[CV 4/5] END .......alpha=0.000756463327554629;, score=-0.301 total time=   0.0s
[CV 5/5] END .......alpha=0.000756463327554629;, score=-0.737 total time=   0.0s
[CV 1/5] END ......alpha=0.0008697490026177834;, score=-0.912 total time=   0.0s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 2/5] END ......alpha=0.0008697490026177834;, score=-0.338 total time=   0.1s


  model = cd_fast.enet_coordinate_descent(


[CV 3/5] END ......alpha=0.0008697490026177834;, score=-0.364 total time=   0.1s
[CV 4/5] END ......alpha=0.0008697490026177834;, score=-0.299 total time=   0.1s
[CV 5/5] END ......alpha=0.0008697490026177834;, score=-0.741 total time=   0.0s
[CV 1/5] END ......................alpha=0.001;, score=-0.912 total time=   0.0s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 2/5] END ......................alpha=0.001;, score=-0.326 total time=   0.0s
[CV 3/5] END ......................alpha=0.001;, score=-0.357 total time=   0.1s
[CV 4/5] END ......................alpha=0.001;, score=-0.296 total time=   0.0s
[CV 5/5] END ......................alpha=0.001;, score=-0.750 total time=   0.0s
[CV 1/5] END ......alpha=0.0011497569953977356;, score=-0.908 total time=   0.0s
[CV 2/5] END ......alpha=0.0011497569953977356;, score=-0.312 total time=   0.0s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 3/5] END ......alpha=0.0011497569953977356;, score=-0.357 total time=   0.0s
[CV 4/5] END ......alpha=0.0011497569953977356;, score=-0.294 total time=   0.0s
[CV 5/5] END ......alpha=0.0011497569953977356;, score=-0.764 total time=   0.0s
[CV 1/5] END ......alpha=0.0013219411484660286;, score=-0.906 total time=   0.0s
[CV 2/5] END ......alpha=0.0013219411484660286;, score=-0.296 total time=   0.0s
[CV 3/5] END ......alpha=0.0013219411484660286;, score=-0.357 total time=   0.0s
[CV 4/5] END ......alpha=0.0013219411484660286;, score=-0.291 total time=   0.0s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 5/5] END ......alpha=0.0013219411484660286;, score=-0.769 total time=   0.0s
[CV 1/5] END ......alpha=0.0015199110829529332;, score=-0.904 total time=   0.0s
[CV 2/5] END ......alpha=0.0015199110829529332;, score=-0.280 total time=   0.1s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 3/5] END ......alpha=0.0015199110829529332;, score=-0.357 total time=   0.3s
[CV 4/5] END ......alpha=0.0015199110829529332;, score=-0.288 total time=   0.1s
[CV 5/5] END ......alpha=0.0015199110829529332;, score=-0.768 total time=   0.1s
[CV 1/5] END ......alpha=0.0017475284000076847;, score=-0.897 total time=   0.1s
[CV 2/5] END ......alpha=0.0017475284000076847;, score=-0.261 total time=   0.1s
[CV 3/5] END ......alpha=0.0017475284000076847;, score=-0.360 total time=   0.1s
[CV 4/5] END ......alpha=0.0017475284000076847;, score=-0.284 total time=   0.0s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 5/5] END ......alpha=0.0017475284000076847;, score=-0.766 total time=   0.1s
[CV 1/5] END .......alpha=0.002009233002565048;, score=-0.882 total time=   0.1s
[CV 2/5] END .......alpha=0.002009233002565048;, score=-0.241 total time=   0.1s
[CV 3/5] END .......alpha=0.002009233002565048;, score=-0.358 total time=   0.0s
[CV 4/5] END .......alpha=0.002009233002565048;, score=-0.279 total time=   0.0s


  model = cd_fast.enet_coordinate_descent(


[CV 5/5] END .......alpha=0.002009233002565048;, score=-0.752 total time=   0.2s
[CV 1/5] END ......alpha=0.0023101297000831605;, score=-0.873 total time=   0.0s
[CV 2/5] END ......alpha=0.0023101297000831605;, score=-0.238 total time=   0.0s
[CV 3/5] END ......alpha=0.0023101297000831605;, score=-0.356 total time=   0.1s
[CV 4/5] END ......alpha=0.0023101297000831605;, score=-0.274 total time=   0.0s
[CV 5/5] END ......alpha=0.0023101297000831605;, score=-0.734 total time=   0.2s
[CV 1/5] END ......alpha=0.0026560877829466868;, score=-0.863 total time=   0.0s
[CV 2/5] END ......alpha=0.0026560877829466868;, score=-0.241 total time=   0.1s
[CV 3/5] END ......alpha=0.0026560877829466868;, score=-0.353 total time=   0.1s
[CV 4/5] END ......alpha=0.0026560877829466868;, score=-0.268 total time=   0.0s
[CV 5/5] END ......alpha=0.0026560877829466868;, score=-0.717 total time=   0.1s
[CV 1/5] END ......alpha=0.0030538555088334154;, score=-0.851 total time=   0.0s
[CV 2/5] END ......alpha=0.0

Iteration: 	1 / 100
Confirmed: 	0
Tentative: 	1015
Rejected: 	0
Iteration: 	2 / 100
Confirmed: 	0
Tentative: 	1015
Rejected: 	0
Iteration: 	3 / 100
Confirmed: 	0
Tentative: 	1015
Rejected: 	0
Iteration: 	4 / 100
Confirmed: 	0
Tentative: 	1015
Rejected: 	0
Iteration: 	5 / 100
Confirmed: 	0
Tentative: 	1015
Rejected: 	0
Iteration: 	6 / 100
Confirmed: 	0
Tentative: 	1015
Rejected: 	0
Iteration: 	7 / 100
Confirmed: 	0
Tentative: 	1015
Rejected: 	0
Iteration: 	8 / 100
Confirmed: 	0
Tentative: 	0
Rejected: 	1015


BorutaPy finished running.

Iteration: 	9 / 100
Confirmed: 	0
Tentative: 	0
Rejected: 	1015


In [None]:
trainer.set_optimizer("optuna", n_trials=10)
trainer.run(auto_preprocess=True)

  mlfc = MLflowCallback(
[32m[I 2022-06-01 11:31:27,879][0m A new study created in memory with name: Liver_detection[0m
[32m[I 2022-06-01 11:31:28,463][0m Trial 0 finished with value: 0.5 and parameters: {'feature_selection_method': 'anova', 'oversampling_method': None, 'model': 'XGBoost', 'xgb_lambda': 0.014570271595538578, 'xgb_alpha': 0.00021300368327705253, 'colsample_bytree': 0.5136940145553204, 'xgb_subsample': 0.47454241292069554, 'xgb_booster': 'gbtree', 'xgb_max_depth': 5, 'xgb_min_child_weight': 8, 'xgb_eta': 2.883592210117951e-07, 'xgb_gamma': 2.53287670148807e-07, 'xgb_grow_policy': 'lossguide'}. Best is trial 0 with value: 0.5.[0m
2022/06/01 11:31:28 INFO mlflow.tracking.fluent: Experiment with name 'Liver_detection' does not exist. Creating a new experiment.
[32m[I 2022-06-01 11:31:28,868][0m Trial 1 finished with value: 0.5 and parameters: {'feature_selection_method': 'lasso', 'oversampling_method': None, 'model': 'Logistic Regression', 'lr_penalty': 'l1', 'lr_C'

In [None]:
from autorad.utils import io
from autorad.evaluation.evaluator import SimpleEvaluator
from autorad.training.trainer import Inferrer

best_params = io.load_json(result_dir / "best_params.json")
inferrer = Inferrer(params=best_params, result_dir=result_dir)
inferrer.fit_eval(feature_dataset, result_name="test")

results = pd.read_csv(result_dir / "test.csv")

evaluator = SimpleEvaluator(results.y_true, results.y_pred)
roc = evaluator.plot_roc_curve()
# roc.write_image(result_dir / "roc.png")
roc

In [None]:
results

Unnamed: 0,y_true,y_pred
0,1,0.001669
1,0,0.088617
2,0,0.340564
3,0,0.001854
