## Loop through all the classifiers

- "Nearest Neighbors"
- "Linear SVM"
- "RBF SVM"
- "Gaussian Process"
- "Decision Tree"
- "Random Forest"
- "Multi Layer Perceptron"
- "AdaBoost"
- "Naive Bayes"
- "QDA"
- "XGB Gradient Boosted Forest"

... and see which one performs the best out-of-the-box.

In [22]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.ensemble import GradientBoostingClassifier

from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.pipeline import Pipeline
import timeit

from sklearn import datasets

In [23]:
# todo: this causes problems with anything but accuracy atm, need to fix that
# score_metrics = ["accuracy", "precision", "recall", "f1"]
score_metrics = ["accuracy"]


iris = datasets.load_iris()
X = iris.data
y = iris.target

kf = KFold(10)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [24]:
names = ["Nearest Neighbors", 
         "Linear SVM", 
         "RBF SVM", 
         "Gaussian Process",
         "Decision Tree", 
         "Random Forest", 
         "Multi Layer Perceptron", 
         "AdaBoost",
         "Naive Bayes", 
         "QDA", 
         "XGB Gradient Boosted Forest",
        ]

classifiers = [
    KNeighborsClassifier(),
    SVC(kernel="linear"),
    SVC(),
    GaussianProcessClassifier(),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    MLPClassifier(),
    AdaBoostClassifier(),
    GaussianNB(),
    QuadraticDiscriminantAnalysis(),
    GradientBoostingClassifier()
    ]


score_accumulator = []


for score_metric in score_metrics:
    for name, clf in zip(names, classifiers):

        pipe = Pipeline([('scaler', StandardScaler()), ('classifier', clf)])
        grid_params = {
        }
        grid = GridSearchCV(pipe, grid_params, cv=kf, scoring = score_metric)
        print(f"--- {name} ---")
        timing = timeit.timeit(lambda: grid.fit(X_train, y_train), number=1)
        score = grid.score(X_test, y_test)

        print(f"{score_metric} score: {score} ({timing} seconds)")
        print(f"--------------")

        score_accumulator.append({
        "model": name,
        "timing": timing,
        "score": score
        })

scores_df = pd.DataFrame(score_accumulator).sort_values("score", ascending=False)
scores_df

--- Nearest Neighbors ---
accuracy score: 0.98 (0.05032541799937462 seconds)
--------------
--- Linear SVM ---
accuracy score: 0.96 (0.030566257000828045 seconds)
--------------
--- RBF SVM ---
accuracy score: 0.98 (0.024052583999946364 seconds)
--------------
--- Gaussian Process ---
accuracy score: 0.98 (0.19153495299997303 seconds)
--------------
--- Decision Tree ---
accuracy score: 0.98 (0.027462089000437118 seconds)
--------------
--- Random Forest ---
accuracy score: 0.98 (1.3724493670006268 seconds)
--------------
--- Multi Layer Perceptron ---




accuracy score: 0.98 (1.3713422620003257 seconds)
--------------
--- AdaBoost ---
accuracy score: 0.92 (0.7898158429998148 seconds)
--------------
--- Naive Bayes ---
accuracy score: 0.96 (0.022599611000259756 seconds)
--------------
--- QDA ---
accuracy score: 0.98 (0.02026868199936871 seconds)
--------------
--- XGB Gradient Boosted Forest ---
accuracy score: 0.98 (1.7636449569999968 seconds)
--------------


Unnamed: 0,model,timing,score
0,Nearest Neighbors,0.050325,0.98
2,RBF SVM,0.024053,0.98
3,Gaussian Process,0.191535,0.98
4,Decision Tree,0.027462,0.98
5,Random Forest,1.372449,0.98
6,Multi Layer Perceptron,1.371342,0.98
9,QDA,0.020269,0.98
10,XGB Gradient Boosted Forest,1.763645,0.98
1,Linear SVM,0.030566,0.96
8,Naive Bayes,0.0226,0.96
