Minimal scikit-learn example code

In [3]:
import warnings
warnings.filterwarnings('ignore')

import slingpy as sp
from typing import AnyStr, Dict, List, Optional
from sklearn.linear_model import LogisticRegression


class MyApplication(sp.AbstractBaseApplication):
    def __init__(self, output_directory: AnyStr = "",
                 schedule_on_slurm: bool = False,
                 split_index_outer: int = 0,
                 split_index_inner: int = 0,
                 num_splits_outer: int = 5,
                 num_splits_inner: int = 5):
        super().__init__(
            output_directory=output_directory,
            schedule_on_slurm=schedule_on_slurm,
            split_index_outer=split_index_outer,
            split_index_inner=split_index_inner,
            num_splits_outer=num_splits_outer,
            num_splits_inner=num_splits_inner
        )

    def get_metrics(self, set_name: AnyStr) -> List[sp.AbstractMetric]:
        return [
            sp.metrics.AreaUnderTheCurve()
        ]

    def load_data(self) -> Dict[AnyStr, sp.AbstractDataSource]:
        data_source_x, data_source_y = sp.datasets.Iris.load_data(self.output_directory)

        stratifier = sp.StratifiedSplit()
        rest_indices, test_indices = stratifier.split(data_source_y,
                                                      split_index=self.split_index_outer,
                                                      num_splits=self.num_splits_outer)
        validation_indices, training_indices = stratifier.split(data_source_y.subset(rest_indices),
                                                                split_index=self.split_index_inner,
                                                                num_splits=self.num_splits_inner)

        return {
            "training_set_x": data_source_x.subset(training_indices),
            "training_set_y": data_source_y.subset(training_indices),
            "validation_set_x": data_source_x.subset(validation_indices),
            "validation_set_y": data_source_y.subset(validation_indices),
            "test_set_x": data_source_x.subset(test_indices),
            "test_set_y": data_source_y.subset(test_indices)
        }

    def get_model(self) -> sp.AbstractBaseModel:
        model = sp.SklearnModel(LogisticRegression())
        return model

    def train_model(self, model: sp.AbstractBaseModel) -> Optional[sp.AbstractBaseModel]:
        model.fit(self.datasets.training_set_x, self.datasets.training_set_y)
        return model

Run the application (nested cross validation on the specified dataset and model)

In [4]:
MyApplication().run()

INFO [1624973192.4286132]: Args are: {'num_splits_inner': 5, 'num_splits_outer': 5, 'output_directory': '/var/folders/9g/qmm07zt55mq14257t30grlkh0000gp/T/tmpci_76a13', 'schedule_on_slurm': False, 'split_index_inner': 0, 'split_index_outer': 0}
INFO [1624973192.4288189]: Running version 0x000
INFO [1624973192.4288859]: Running at 2021-06-29 15:26:32.428874
INFO [1624973192.4289920]: There are 0 GPUs available.
INFO [1624973192.4350350]: Run with args: {'num_splits_inner': 5, 'num_splits_outer': 5, 'output_directory': '/var/folders/9g/qmm07zt55mq14257t30grlkh0000gp/T/tmpci_76a13/outer_0/inner_0', 'schedule_on_slurm': False, 'split_index_inner': 0, 'split_index_outer': 0}
INFO [1624973192.4943161]: Saving args to /var/folders/9g/qmm07zt55mq14257t30grlkh0000gp/T/tmpci_76a13/outer_0/inner_0/losses.pickle
INFO [1624973192.5047731]: Performance on test OrderedDict([('AreaUnderTheCurve', array(0.48399014))])
INFO [1624973192.5064030]: Run with args: {'num_splits_inner': 5, 'num_splits_outer': 

WARN [1624973192.4322660]: /var/folders/9g/qmm07zt55mq14257t30grlkh0000gp/T/tmpci_76a13 already existed. Its previous contents may be overwritten.


INFO [1624973192.6861598]: Saving args to /var/folders/9g/qmm07zt55mq14257t30grlkh0000gp/T/tmpci_76a13/outer_0/inner_2/losses.pickle
INFO [1624973192.6975648]: Performance on test OrderedDict([('AreaUnderTheCurve', array(0.48891876))])
INFO [1624973192.6992710]: Run with args: {'num_splits_inner': 5, 'num_splits_outer': 5, 'output_directory': '/var/folders/9g/qmm07zt55mq14257t30grlkh0000gp/T/tmpci_76a13/outer_0/inner_3', 'schedule_on_slurm': False, 'split_index_inner': 3, 'split_index_outer': 0}
INFO [1624973192.7885189]: Saving args to /var/folders/9g/qmm07zt55mq14257t30grlkh0000gp/T/tmpci_76a13/outer_0/inner_3/losses.pickle
INFO [1624973192.7981520]: Performance on test OrderedDict([('AreaUnderTheCurve', array(0.4886734))])
INFO [1624973192.7998579]: Run with args: {'num_splits_inner': 5, 'num_splits_outer': 5, 'output_directory': '/var/folders/9g/qmm07zt55mq14257t30grlkh0000gp/T/tmpci_76a13/outer_0/inner_4', 'schedule_on_slurm': False, 'split_index_inner': 4, 'split_index_outer': 0}

RunResult(validation_scores={'AreaUnderTheCurve': 0.5015138039946224, 'AreaUnderTheCurve_std': 0.003281435671768332, 'AreaUnderTheCurve_results': [0.5116386083052751, 0.5116386083052751, 0.4863265975286436, 0.4863265975286436, 0.5116386083052751], 'AreaUnderTheCurve_std_std': 0.0007162805001471689, 'AreaUnderTheCurve_std_results': [0.0026965950924129675, 0.0026965950924129675, 0.00415869654080138, 0.00415869654080138, 0.0026965950924129675], 'AreaUnderTheCurve_results_std': 0.012847115291627062, 'AreaUnderTheCurve_results_results': [[array(0.51140011), array(0.51460999), array(0.51281987), array(0.51271605), array(0.50664703)], [array(0.51140011), array(0.51460999), array(0.51281987), array(0.51271605), array(0.50664703)], [array(0.48399014), array(0.4907438), array(0.48891876), array(0.4886734), array(0.47930689)], [array(0.48399014), array(0.4907438), array(0.48891876), array(0.4886734), array(0.47930689)], [array(0.51140011), array(0.51460999), array(0.51281987), array(0.51271605), 