In [1]:
%matplotlib inline


# Classification

The following example shows how to fit a simple classification model with
*auto-sklearn*.


In [9]:
from pprint import pprint

import sklearn.datasets
import sklearn.metrics
from sklearn import model_selection
import autosklearn.classification

## Data Loading



In [3]:
X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
    X, y, random_state=1
)

## Build and fit a classifier



In [5]:
automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=120,
    per_run_time_limit=30,
    memory_limit=16384,
    tmp_folder="/tmp/autosklearn_classification_example_tmp",
)
automl.fit(X_train, y_train, dataset_name="breast_cancer")

AutoSklearnClassifier(ensemble_class=<class 'autosklearn.ensembles.ensemble_selection.EnsembleSelection'>,
                      memory_limit=16384, per_run_time_limit=30,
                      time_left_for_this_task=120,
                      tmp_folder='/tmp/autosklearn_classification_example_tmp')

## View the models found by auto-sklearn



In [6]:
print(automl.leaderboard())

          rank  ensemble_weight                type      cost  duration
model_id                                                               
88           2             0.06         extra_trees  0.007092  1.413559
94           3             0.04         extra_trees  0.007092  0.892238
108          1             0.12         extra_trees  0.007092  1.415933
7            4             0.02         extra_trees  0.014184  0.449032
27           8             0.04         extra_trees  0.014184  0.687339
54           7             0.04         extra_trees  0.014184  0.898463
75           6             0.04          libsvm_svc  0.014184  0.354724
80           5             0.02         extra_trees  0.014184  0.486883
21          11             0.02         extra_trees  0.021277  0.403240
30          12             0.02         extra_trees  0.021277  5.049771
35          13             0.02                 lda  0.021277  0.273900
73          10             0.02            adaboost  0.021277  0

## Print the final ensemble constructed by auto-sklearn



In [7]:
pprint(automl.show_models(), indent=4)

{   3: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x75ade9a3b490>,
           'cost': 0.028368794326241176,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x75adf1e73f10>,
           'ensemble_weight': 0.06,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x75ade9a3bc40>,
           'model_id': 3,
           'rank': 1,
           'sklearn_classifier': MLPClassifier(activation='tanh', alpha=0.0001363185819149026, beta_1=0.999,
              beta_2=0.9, early_stopping=True,
              hidden_layer_sizes=(115, 115, 115),
              learning_rate_init=0.00018009776276177523, max_iter=32,
              n_iter_no_change=32, random_state=1, verbose=0, warm_start=True)},
    6: {   'balancing': Balancing(random_state=1, strategy='weighting'),


## Get the Score of the final ensemble



In [8]:
predictions = automl.predict(X_test)
print("Accuracy score:", sklearn.metrics.accuracy_score(y_test, predictions))

Accuracy score: 0.951048951048951
