# Get all the classifiers, store them in estimators['classifier']

In [1]:
from sklearn.utils import all_estimators

estimator_types = ['classifier', 'regressor', 'cluster', 'transformer']
estimator_list = all_estimators(type_filter='classifier')

estimators = {}
estimator_names = {}

for estimator_type in estimator_types:
    all_things = []
    all_names = []
    for name, thing in estimator_list:
        try:
#             print(f'{estimator_type}: {name}')
            all_things.append(thing())
            all_names.append(name)
        except Exception as e:
            pass
    estimators[estimator_type] = all_things
    estimator_names[estimator_type] = all_names

all_estimators = zip(estimator_names, estimators)
all_classifiers = zip(estimator_names['classifier'], estimators['classifier'])

## Loop through all the classifiers
... and see which one performs the best out-of-the-box.

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.ensemble import GradientBoostingClassifier

from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.pipeline import Pipeline
import timeit

from sklearn import datasets

In [3]:
# score_metrics = ["accuracy", "precision", "recall", "f1"]
# score_metrics = ["accuracy"]
# score_metrics = ["precision"]
# score_metrics = ["recall"]
score_metrics = ["f1"]

iris = datasets.load_iris()
X = iris.data[:99]
y = iris.target[:99]

kf = KFold(4)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [4]:
print(X.min())

0.1


In [5]:
score_accumulator = []
grid_params = {
    }

for score_metric in score_metrics:
    print(f"----------------- {score_metric} ------------------------")

    for name, clf in all_classifiers:

        pipe = Pipeline([('classifier', clf)])
#         pipe = Pipeline([('scaler', StandardScaler()), ('classifier', clf)])

        grid = GridSearchCV(pipe, grid_params, cv=kf, scoring = score_metric)
        print(f"--- {name} ---")
        timing = timeit.timeit(lambda: grid.fit(X_train, y_train), number=1)
        score = grid.score(X_test, y_test)

        print(f"{score_metric} score: {score} ({timing} seconds)")
        print(f"--------------")

        score_accumulator.append({
        "model": name,
        "timing": timing,
        "score": score
        })



----------------- f1 ------------------------
--- AdaBoostClassifier ---
f1 score: 1.0 (0.02023991700000005 seconds)
--------------
--- BaggingClassifier ---
f1 score: 1.0 (0.09073035500000004 seconds)
--------------
--- BernoulliNB ---
f1 score: 0.5652173913043478 (0.012599164000000052 seconds)
--------------
--- CalibratedClassifierCV ---
f1 score: 1.0 (0.07764597200000001 seconds)
--------------
--- CategoricalNB ---
f1 score: 1.0 (0.017619360999999945 seconds)
--------------
--- ComplementNB ---
f1 score: 1.0 (0.012959298999999813 seconds)
--------------
--- DecisionTreeClassifier ---
f1 score: 1.0 (0.010813645999999899 seconds)
--------------
--- DummyClassifier ---
f1 score: 0.5652173913043478 (0.009155783999999834 seconds)
--------------
--- ExtraTreeClassifier ---
f1 score: 0.9600000000000001 (0.010690851999999751 seconds)
--------------
--- ExtraTreesClassifier ---


Traceback (most recent call last):
  File "/usr/local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 674, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 199, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true,
  File "/usr/local/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 236, in _score
    y_pred = method_caller(estimator, "predict", X)
  File "/usr/local/lib/python3.8/site-packages/sklearn/metrics/_scorer.py", line 53, in _cached_call
    return getattr(estimator, method)(*args, **kwargs)
  File "/usr/local/lib/python3.8/site-packages/sklearn/utils/metaestimators.py", line 120, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
  File "/usr/local/lib/python3.8/site-packages/sklearn/pipeline.py", line 419, in predict
    return self.steps[-1][-1].predict(Xt, **predict_params)
  File "/usr/loca

f1 score: 1.0 (0.44605130199999987 seconds)
--------------
--- GaussianNB ---
f1 score: 1.0 (0.010450266000000319 seconds)
--------------
--- GaussianProcessClassifier ---
f1 score: 1.0 (0.022754953999999827 seconds)
--------------
--- GradientBoostingClassifier ---
f1 score: 1.0 (0.16746193399999987 seconds)
--------------
--- HistGradientBoostingClassifier ---
f1 score: 1.0 (0.22097763799999992 seconds)
--------------
--- KNeighborsClassifier ---
f1 score: 1.0 (0.01761752999999988 seconds)
--------------
--- LabelPropagation ---
f1 score: 1.0 (0.018901641999999885 seconds)
--------------
--- LabelSpreading ---
f1 score: 1.0 (0.01980711200000007 seconds)
--------------
--- LinearDiscriminantAnalysis ---
f1 score: 1.0 (0.01810846899999996 seconds)
--------------
--- LinearSVC ---
f1 score: 1.0 (0.01667222600000029 seconds)
--------------
--- LogisticRegression ---
f1 score: 1.0 (0.047677553999999844 seconds)
--------------
--- LogisticRegressionCV ---
f1 score: 1.0 (0.8773988949999998 



f1 score: 1.0 (0.45749087999999993 seconds)
--------------
--- MultinomialNB ---
f1 score: 1.0 (0.011883586999999807 seconds)
--------------
--- NearestCentroid ---
f1 score: 1.0 (0.00977063200000039 seconds)
--------------
--- NuSVC ---
f1 score: 1.0 (0.009903202000000277 seconds)
--------------
--- PassiveAggressiveClassifier ---
f1 score: 1.0 (0.010721954000000089 seconds)
--------------
--- Perceptron ---
f1 score: 1.0 (0.010685465999999977 seconds)
--------------
--- QuadraticDiscriminantAnalysis ---
f1 score: 1.0 (0.009301932000000512 seconds)
--------------
--- RadiusNeighborsClassifier ---
f1 score: 1.0 (0.01200869200000021 seconds)
--------------
--- RandomForestClassifier ---
f1 score: 1.0 (0.5895423089999996 seconds)
--------------
--- RidgeClassifier ---
f1 score: 1.0 (0.016240863999999355 seconds)
--------------
--- RidgeClassifierCV ---
f1 score: 1.0 (0.014427841999999913 seconds)
--------------
--- SGDClassifier ---
f1 score: 1.0 (0.01077330700000001 seconds)
-----------

In [6]:
scores_df = pd.DataFrame(score_accumulator).sort_values("score", ascending=False)
scores_df

Unnamed: 0,model,timing,score
0,AdaBoostClassifier,0.02024,1.0
25,PassiveAggressiveClassifier,0.010722,1.0
19,LogisticRegression,0.047678,1.0
20,LogisticRegressionCV,0.877399,1.0
21,MLPClassifier,0.457491,1.0
22,MultinomialNB,0.011884,1.0
23,NearestCentroid,0.009771,1.0
24,NuSVC,0.009903,1.0
26,Perceptron,0.010685,1.0
1,BaggingClassifier,0.09073,1.0
