## Loop through all the classifiers

- "Nearest Neighbors"
- "Linear SVM"
- "RBF SVM"
- "Gaussian Process"
- "Decision Tree"
- "Random Forest"
- "Multi Layer Perceptron"
- "AdaBoost"
- "Naive Bayes"
- "QDA"
- "XGB Gradient Boosted Forest"

... and see which one performs the best out-of-the-box.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.ensemble import GradientBoostingClassifier

from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.pipeline import Pipeline
import timeit

from sklearn import datasets

In [17]:
# todo: this causes problems with anything but accuracy atm, need to fix that
# score_metrics = ["accuracy", "precision", "recall", "f1"]
score_metrics = ["accuracy"]


iris = datasets.load_iris()
X = iris.data[:, :2]  # we only take the first two features.
y = iris.target

kf = KFold(10)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [18]:
names = ["Nearest Neighbors", 
         "Linear SVM", 
         "RBF SVM", 
         "Gaussian Process",
         "Decision Tree", 
         "Random Forest", 
         "Multi Layer Perceptron", 
         "AdaBoost",
         "Naive Bayes", 
         "QDA", 
         "XGB Gradient Boosted Forest",
        ]

classifiers = [
    KNeighborsClassifier(),
    SVC(kernel="linear"),
    SVC(),
    GaussianProcessClassifier(),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    MLPClassifier(),
    AdaBoostClassifier(),
    GaussianNB(),
    QuadraticDiscriminantAnalysis(),
    GradientBoostingClassifier()
    ]


score_accumulator = []


for score_metric in score_metrics:
    for name, clf in zip(names, classifiers):

        pipe = Pipeline([('scaler', StandardScaler()), ('classifier', clf)])
        grid_params = {
        }
        grid = GridSearchCV(pipe, grid_params, cv=kf, scoring = score_metric)
        print(f"--- {name} ---")
        timing = timeit.timeit(lambda: grid.fit(X_train, y_train), number=1)
        score = grid.score(X_test, y_test)

        print(f"{score_metric} score: {score} ({timing} seconds)")
        print(f"--------------")

        score_accumulator.append({
        "model": name,
        "timing": timing,
        "score": score
        })

scores_df = pd.DataFrame(score_accumulator).sort_values("score", ascending=False)
scores_df

--- Nearest Neighbors ---
accuracy score: 0.8 (0.032930601000089155 seconds)
--------------
--- Linear SVM ---
accuracy score: 0.78 (0.024263922000045568 seconds)
--------------
--- RBF SVM ---
accuracy score: 0.72 (0.02471750799986694 seconds)
--------------
--- Gaussian Process ---
accuracy score: 0.78 (0.20593385200004377 seconds)
--------------
--- Decision Tree ---
accuracy score: 0.7 (0.02484309800001938 seconds)
--------------
--- Random Forest ---
accuracy score: 0.74 (1.4687409109999408 seconds)
--------------
--- Multi Layer Perceptron ---




accuracy score: 0.78 (1.2157313920001798 seconds)
--------------
--- AdaBoost ---
accuracy score: 0.74 (0.6823049919998994 seconds)
--------------
--- Naive Bayes ---
accuracy score: 0.8 (0.021531905000074403 seconds)
--------------
--- QDA ---
accuracy score: 0.78 (0.021133478999900035 seconds)
--------------
--- XGB Gradient Boosted Forest ---
accuracy score: 0.8 (1.824371759000087 seconds)
--------------


Unnamed: 0,model,timing,score
0,Nearest Neighbors,0.032931,0.8
8,Naive Bayes,0.021532,0.8
10,XGB Gradient Boosted Forest,1.824372,0.8
1,Linear SVM,0.024264,0.78
3,Gaussian Process,0.205934,0.78
6,Multi Layer Perceptron,1.215731,0.78
9,QDA,0.021133,0.78
5,Random Forest,1.468741,0.74
7,AdaBoost,0.682305,0.74
2,RBF SVM,0.024718,0.72
