# Get all the classifiers, store them in estimators['classifier']

In [1]:
from sklearn.utils import all_estimators

estimator_types = ['classifier', 'regressor', 'cluster', 'transformer']
estimator_list = all_estimators(type_filter='classifier')

estimators = {}
estimator_names = {}

for estimator_type in estimator_types:
    all_things = []
    all_names = []
    for name, thing in estimator_list:
        try:
#             print(f'{estimator_type}: {name}')
            all_things.append(thing())
            all_names.append(name)
        except Exception as e:
            pass
    estimators[estimator_type] = all_things
    estimator_names[estimator_type] = all_names

all_estimators = zip(estimator_names, estimators)
all_classifiers = zip(estimator_names['classifier'], estimators['classifier'])

## Loop through all the classifiers
... and see which one performs the best out-of-the-box.

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.ensemble import GradientBoostingClassifier

from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.pipeline import Pipeline
import timeit

from sklearn import datasets

In [3]:
# score_metrics = ["accuracy", "precision", "recall", "f1"]
# score_metrics = ["accuracy"]
# score_metrics = ["precision"]
# score_metrics = ["recall"]
score_metrics = ["f1"]

iris = datasets.load_iris()
X = iris.data[:99]
y = iris.target[:99]

kf = KFold(3)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [4]:
print(X.min())

0.1


In [None]:
score_accumulator = []
grid_params = {
    }

for score_metric in score_metrics:
    print(f"----------------- {score_metric} ------------------------")

    for name, clf in all_classifiers:

        try:
            print(f"    ---- {name}   ------")
            pipe = Pipeline([('classifier', clf)])
    #         pipe = Pipeline([('scaler', StandardScaler()), ('classifier', clf)])

            grid = GridSearchCV(pipe, grid_params, cv=kf, scoring = score_metric)
            print(f"--- {name} ---")
            timing = timeit.timeit(lambda: grid.fit(X_train, y_train), number=1)
            score = grid.score(X_test, y_test)

            print(f"{score_metric} score: {score} ({timing} seconds)")
            print(f"--------------")

            score_accumulator.append({
            "model": name,
            "timing": timing,
            "score": score
            })

        except Exception as e:
            print("nope.")


----------------- f1 ------------------------
    ---- AdaBoostClassifier   ------
--- AdaBoostClassifier ---
f1 score: 1.0 (0.02401256500000004 seconds)
--------------
    ---- BaggingClassifier   ------
--- BaggingClassifier ---
f1 score: 1.0 (0.07960495700000036 seconds)
--------------
    ---- BernoulliNB   ------
--- BernoulliNB ---
f1 score: 0.5652173913043478 (0.01135765799999966 seconds)
--------------
    ---- CalibratedClassifierCV   ------
--- CalibratedClassifierCV ---
f1 score: 1.0 (0.06341564899999996 seconds)
--------------
    ---- CategoricalNB   ------
--- CategoricalNB ---
f1 score: 1.0 (0.014651811999999875 seconds)
--------------
    ---- ComplementNB   ------
--- ComplementNB ---


Traceback (most recent call last):
  File "/usr/local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 674, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 199, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true,
  File "/usr/local/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 236, in _score
    y_pred = method_caller(estimator, "predict", X)
  File "/usr/local/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 53, in _cached_call
    return getattr(estimator, method)(*args, **kwargs)
  File "/usr/local/lib/python3.8/site-packages/sklearn/utils/metaestimators.py", line 120, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
  File "/usr/local/lib/python3.8/site-packages/sklearn/pipeline.py", line 419, in predict
    return self.steps[-1][-1].predict(Xt, **predict_params)
  File "/usr/loca

f1 score: 1.0 (0.011051035000000375 seconds)
--------------
    ---- DecisionTreeClassifier   ------
--- DecisionTreeClassifier ---
f1 score: 1.0 (0.01584263599999991 seconds)
--------------
    ---- DummyClassifier   ------
--- DummyClassifier ---
f1 score: 0.5652173913043478 (0.0121676970000002 seconds)
--------------
    ---- ExtraTreeClassifier   ------
--- ExtraTreeClassifier ---
f1 score: 0.962962962962963 (0.012810232000000088 seconds)
--------------
    ---- ExtraTreesClassifier   ------
--- ExtraTreesClassifier ---
f1 score: 1.0 (0.5094670809999999 seconds)
--------------
    ---- GaussianNB   ------
--- GaussianNB ---
f1 score: 1.0 (0.01018718599999957 seconds)
--------------
    ---- GaussianProcessClassifier   ------
--- GaussianProcessClassifier ---
f1 score: 1.0 (0.024651720000000044 seconds)
--------------
    ---- GradientBoostingClassifier   ------
--- GradientBoostingClassifier ---
f1 score: 1.0 (0.17703587399999998 seconds)
--------------
    ---- HistGradientBoostin



f1 score: 1.0 (0.3448206549999995 seconds)
--------------
    ---- MultinomialNB   ------
--- MultinomialNB ---
f1 score: 1.0 (0.010572605999999318 seconds)
--------------
    ---- NearestCentroid   ------
--- NearestCentroid ---
f1 score: 1.0 (0.009170564999999797 seconds)
--------------
    ---- NuSVC   ------
--- NuSVC ---
f1 score: 1.0 (0.011419541999999616 seconds)
--------------
    ---- PassiveAggressiveClassifier   ------
--- PassiveAggressiveClassifier ---
f1 score: 1.0 (0.010786537999999624 seconds)
--------------
    ---- Perceptron   ------
--- Perceptron ---
f1 score: 1.0 (0.010582635999999646 seconds)
--------------
    ---- QuadraticDiscriminantAnalysis   ------
--- QuadraticDiscriminantAnalysis ---
f1 score: 1.0 (0.0077737829999993124 seconds)
--------------
    ---- RadiusNeighborsClassifier   ------
--- RadiusNeighborsClassifier ---
f1 score: 1.0 (0.009795710000000568 seconds)
--------------
    ---- RandomForestClassifier   ------
--- RandomForestClassifier ---


In [None]:
scores_df = pd.DataFrame(score_accumulator).sort_values("score", ascending=False)
scores_df