In [1]:
%matplotlib inline


# Interpretable models

The following example shows how to inspect the models which *auto-sklearn*
optimizes over and how to restrict them to an interpretable subset.


In [2]:
from pprint import pprint

import autosklearn.classification
import sklearn.datasets
import sklearn.metrics



## Show available classification models

We will first list all classifiers Auto-sklearn chooses from. A similar
call is available for preprocessors (see below) and regression (not shown)
as well.



In [3]:
from autosklearn.pipeline.components.classification import ClassifierChoice

for name in ClassifierChoice.get_components():
    print(name)

adaboost
bernoulli_nb
decision_tree
extra_trees
gaussian_nb
gradient_boosting
k_nearest_neighbors
lda
liblinear_svc
libsvm_svc
mlp
multinomial_nb
passive_aggressive
qda
random_forest
sgd


## Show available preprocessors



In [4]:
from autosklearn.pipeline.components.feature_preprocessing import (
    FeaturePreprocessorChoice,
)

for name in FeaturePreprocessorChoice.get_components():
    print(name)

densifier
extra_trees_preproc_for_classification
extra_trees_preproc_for_regression
fast_ica
feature_agglomeration
kernel_pca
kitchen_sinks
liblinear_svc_preprocessor
no_preprocessing
nystroem_sampler
pca
polynomial
random_trees_embedding
select_percentile_classification
select_percentile_regression
select_rates_classification
select_rates_regression
truncatedSVD


In [23]:
from autosklearn.pipeline.components.data_preprocessing import (
    DataPreprocessorChoice,
)

for name in DataPreprocessorChoice.get_components():
    print(name)

feature_type


In [24]:
DataPreprocessorChoice.get_components()

OrderedDict([('feature_type',
              autosklearn.pipeline.components.data_preprocessing.feature_type.FeatTypeSplit)])

## Data Loading



In [5]:
X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
    X, y, random_state=1
)

## Build and fit a classifier

We will now only use a subset of the given classifiers and preprocessors.
Furthermore, we will restrict the ensemble size to ``1`` to only use the
single best model in the end. However, we would like to note that the
choice of which models is deemed interpretable is very much up to the user
and can change from use case to use case.



In [7]:
automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=120,
    per_run_time_limit=30,
    memory_limit=16384,
    tmp_folder="/tmp/autosklearn_interpretable_models_example_tmp",
    include={
        "classifier": ["decision_tree", "lda", "sgd"],
        "feature_preprocessor": [
            "no_preprocessing",
            "polynomial",
            "select_percentile_classification",
        ],
    },
    ensemble_kwargs={"ensemble_size": 1},
)
automl.fit(X_train, y_train, dataset_name="breast_cancer")



AutoSklearnClassifier(ensemble_class=<class 'autosklearn.ensembles.ensemble_selection.EnsembleSelection'>,
                      ensemble_kwargs={'ensemble_size': 1},
                      include={'classifier': ['decision_tree', 'lda', 'sgd'],
                               'feature_preprocessor': ['no_preprocessing',
                                                        'polynomial',
                                                        'select_percentile_classification']},
                      memory_limit=16384, per_run_time_limit=30,
                      time_left_for_this_task=120,
                      tmp_folder='/tmp/autosklearn_interpretable_models_example_tmp')

## Print the final ensemble constructed by auto-sklearn



In [8]:
pprint(automl.show_models(), indent=4)

{   93: {   'balancing': Balancing(random_state=1, strategy='weighting'),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x76c5ef0a2100>,
            'cost': 0.007092198581560294,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x76c5ef0a2df0>,
            'ensemble_weight': 1.0,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x76c5ef0a2e50>,
            'model_id': 93,
            'rank': 1,
            'sklearn_classifier': SGDClassifier(alpha=0.001390012900046998, average=True,
              eta0=0.00019884076487668, learning_rate='invscaling', loss='log',
              max_iter=16, power_t=0.5584992727456427, random_state=1,
              tol=0.01710369537847631, warm_start=True)}}


In [18]:
pprint(automl, indent=4)

<method-wrapper '__getattribute__' of AutoSklearnClassifier object at 0x76c5ef118700>


## Get the Score of the final ensemble



In [13]:
predictions = automl.predict(X_test)
print("Accuracy score:", sklearn.metrics.accuracy_score(y_test, predictions))

Accuracy score: 0.9370629370629371


In [10]:
automl

TypeError: get_configuration_space() missing 2 required positional arguments: 'X' and 'y'