# Get all the classifiers, store them in estimators['classifier']

In [1]:
from sklearn.utils import all_estimators

estimator_types = ['classifier', 'regressor', 'cluster', 'transformer']
estimator_list = all_estimators(type_filter='classifier')

estimators = {}
estimator_names = {}

for estimator_type in estimator_types:
    all_things = []
    all_names = []
    for name, thing in estimator_list:
        try:
            print(f'{estimator_type}: {name}')
            all_things.append(thing())
            all_names.append(name)
        except Exception as e:
            pass
    estimators[estimator_type] = all_things
    estimator_names[estimator_type] = all_names

all_estimators = zip(estimator_names, estimators)
all_classifiers = zip(estimator_names['classifier'], estimators['classifier'])

classifier: AdaBoostClassifier
classifier: BaggingClassifier
classifier: BernoulliNB
classifier: CalibratedClassifierCV
classifier: CategoricalNB
classifier: ClassifierChain
classifier: ComplementNB
classifier: DecisionTreeClassifier
classifier: DummyClassifier
classifier: ExtraTreeClassifier
classifier: ExtraTreesClassifier
classifier: GaussianNB
classifier: GaussianProcessClassifier
classifier: GradientBoostingClassifier
classifier: HistGradientBoostingClassifier
classifier: KNeighborsClassifier
classifier: LabelPropagation
classifier: LabelSpreading
classifier: LinearDiscriminantAnalysis
classifier: LinearSVC
classifier: LogisticRegression
classifier: LogisticRegressionCV
classifier: MLPClassifier
classifier: MultiOutputClassifier
classifier: MultinomialNB
classifier: NearestCentroid
classifier: NuSVC
classifier: OneVsOneClassifier
classifier: OneVsRestClassifier
classifier: OutputCodeClassifier
classifier: PassiveAggressiveClassifier
classifier: Perceptron
classifier: QuadraticDisc

## Loop through all the classifiers
... and see which one performs the best out-of-the-box.

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.ensemble import GradientBoostingClassifier

from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.pipeline import Pipeline
import timeit

from sklearn import datasets

In [3]:
# score_metrics = ["accuracy", "precision", "recall", "f1"]
# score_metrics = ["accuracy"]
# score_metrics = ["precision"]
# score_metrics = ["recall"]
score_metrics = ["f1"]

iris = datasets.load_iris()
X = iris.data[:99]
y = iris.target[:99]

kf = KFold(3)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [4]:
print(X.min())

0.1


In [5]:
score_accumulator = []
grid_params = {
    }

for score_metric in score_metrics:
    print(f"----------------- {score_metric} ------------------------")

    for name, clf in all_classifiers:

        try:
            print(f"    ---- {name}   ------")
            pipe = Pipeline([('classifier', clf)])
    #         pipe = Pipeline([('scaler', StandardScaler()), ('classifier', clf)])

            grid = GridSearchCV(pipe, grid_params, cv=kf, scoring = score_metric)
            print(f"--- {name} ---")
            timing = timeit.timeit(lambda: grid.fit(X_train, y_train), number=1)
            score = grid.score(X_test, y_test)

            print(f"{score_metric} score: {score} ({timing} seconds)")
            print(f"--------------")

            score_accumulator.append({
            "model": name,
            "timing": timing,
            "score": score
            })

        except Exception as e:
            print("nope.")


----------------- f1 ------------------------
    ---- AdaBoostClassifier   ------
--- AdaBoostClassifier ---
f1 score: 1.0 (0.017194578999999877 seconds)
--------------
    ---- BaggingClassifier   ------
--- BaggingClassifier ---
f1 score: 1.0 (0.06903459899999964 seconds)
--------------
    ---- BernoulliNB   ------
--- BernoulliNB ---
f1 score: 0.5652173913043478 (0.010255981000000247 seconds)
--------------
    ---- CalibratedClassifierCV   ------
--- CalibratedClassifierCV ---
f1 score: 1.0 (0.0565723819999997 seconds)
--------------
    ---- CategoricalNB   ------
--- CategoricalNB ---
f1 score: 1.0 (0.015840367000000022 seconds)
--------------
    ---- ComplementNB   ------
--- ComplementNB ---
f1 score: 1.0 (0.008439105000000335 seconds)
--------------
    ---- DecisionTreeClassifier   ------
--- DecisionTreeClassifier ---
f1 score: 1.0 (0.007942900000000197 seconds)
--------------
    ---- DummyClassifier   ------
--- DummyClassifier ---
f1 score: 0.5652173913043478 (0.007269

Traceback (most recent call last):
  File "/usr/local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 674, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 199, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true,
  File "/usr/local/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 236, in _score
    y_pred = method_caller(estimator, "predict", X)
  File "/usr/local/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 53, in _cached_call
    return getattr(estimator, method)(*args, **kwargs)
  File "/usr/local/lib/python3.8/site-packages/sklearn/utils/metaestimators.py", line 120, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
  File "/usr/local/lib/python3.8/site-packages/sklearn/pipeline.py", line 419, in predict
    return self.steps[-1][-1].predict(Xt, **predict_params)
  File "/usr/loca


--------------
    ---- ExtraTreeClassifier   ------
--- ExtraTreeClassifier ---
f1 score: 1.0 (0.010304198000000042 seconds)
--------------
    ---- ExtraTreesClassifier   ------
--- ExtraTreesClassifier ---
f1 score: 1.0 (0.372689528 seconds)
--------------
    ---- GaussianNB   ------
--- GaussianNB ---
f1 score: 1.0 (0.009865932999999938 seconds)
--------------
    ---- GaussianProcessClassifier   ------
--- GaussianProcessClassifier ---
f1 score: 1.0 (0.02048037700000016 seconds)
--------------
    ---- GradientBoostingClassifier   ------
--- GradientBoostingClassifier ---
f1 score: 1.0 (0.15568410700000035 seconds)
--------------
    ---- HistGradientBoostingClassifier   ------
--- HistGradientBoostingClassifier ---
f1 score: 1.0 (0.22443575700000018 seconds)
--------------
    ---- KNeighborsClassifier   ------
--- KNeighborsClassifier ---
f1 score: 1.0 (0.018446411999999857 seconds)
--------------
    ---- LabelPropagation   ------
--- LabelPropagation ---
f1 score: 1.0 (0.013



f1 score: 1.0 (0.34259216299999995 seconds)
--------------
    ---- MultinomialNB   ------
--- MultinomialNB ---
f1 score: 1.0 (0.00850634400000061 seconds)
--------------
    ---- NearestCentroid   ------
--- NearestCentroid ---
f1 score: 1.0 (0.00792919200000064 seconds)
--------------
    ---- NuSVC   ------
--- NuSVC ---
f1 score: 1.0 (0.010123293000000366 seconds)
--------------
    ---- PassiveAggressiveClassifier   ------
--- PassiveAggressiveClassifier ---
f1 score: 1.0 (0.010830008000000113 seconds)
--------------
    ---- Perceptron   ------
--- Perceptron ---
f1 score: 1.0 (0.010643224999999923 seconds)
--------------
    ---- QuadraticDiscriminantAnalysis   ------
--- QuadraticDiscriminantAnalysis ---
f1 score: 1.0 (0.008866554999999998 seconds)
--------------
    ---- RadiusNeighborsClassifier   ------
--- RadiusNeighborsClassifier ---
f1 score: 1.0 (0.014399303000000252 seconds)
--------------
    ---- RandomForestClassifier   ------
--- RandomForestClassifier ---
f1 scor

In [6]:
scores_df = pd.DataFrame(score_accumulator).sort_values("score", ascending=False)
scores_df

Unnamed: 0,model,timing,score
0,AdaBoostClassifier,0.017195,1.0
25,PassiveAggressiveClassifier,0.01083,1.0
19,LogisticRegression,0.037831,1.0
20,LogisticRegressionCV,0.775584,1.0
21,MLPClassifier,0.342592,1.0
22,MultinomialNB,0.008506,1.0
23,NearestCentroid,0.007929,1.0
24,NuSVC,0.010123,1.0
26,Perceptron,0.010643,1.0
1,BaggingClassifier,0.069035,1.0
