In [24]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.metrics import accuracy_score, make_scorer, confusion_matrix
from sklearn.model_selection import (GridSearchCV, cross_validate,
                                     train_test_split)
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from get_data import ceramiche_DB
from tqdm import tqdm

In [38]:
classifiers = {
    "Nearest Neighbors": (
                    KNeighborsClassifier(),
                    {'n_neighbors': list(range(1,11)), 'weights': ['uniform', 'distance']}
                ),
    "SVC": (
                    SVC(),
                    [{
                        'kernel': ['linear', 'rbf', 'sigmoid'],
                        'C': [x/10 for x in range(1, 11)],
                    },
                    {
                        'kernel': ['poly'],
                        'degree': list(range(2,6)),
                        'C': [x/10 for x in range(1, 11)],
                    }]
                ),
    "Decision Tree": (
                    DecisionTreeClassifier(),
                    {
                        'criterion': ['gini', 'entropy'], 
                        'min_samples_split':list(range(2,11)),
                        'min_samples_leaf': list(range(1,6)),
                    }
                ),
    "Random Forest": (
                    RandomForestClassifier(n_jobs=-1),
                    {
                        'n_estimators': list(range(1, 101, 10)), 
                        'criterion': ['gini', 'entropy'], 
                        'min_samples_split':list(range(2, 11)),
                        'min_samples_leaf': list(range(1, 6)),
                        'oob_score': [True, False],
                    }
                ),
    "Naive Bayes": (
                    GaussianNB(),
                    {}
                ),


    # "RBF SVM": SVC(gamma=2, C=1),
    # "Gaussian Process": GaussianProcessClassifier(1.0 * RBF(1.0)),
    # "Neural Net": MLPClassifier(alpha=1, max_iter=1000),
    # "AdaBoost": AdaBoostClassifier(),
    # "QDA": QuadraticDiscriminantAnalysis(),
}

In [39]:
def sensibility(y_true, y_predict):
    c_matrix = confusion_matrix(y_true, y_predict)
    return c_matrix[1][1]/sum(c_matrix[1])


def specificity(y_true, y_predict):
    c_matrix = confusion_matrix(y_true, y_predict)
    return c_matrix[0][0]/sum(c_matrix[0])


scores = {
    'sensibility' : make_scorer(sensibility, greater_is_better=True),
    'specificity' : make_scorer(specificity, greater_is_better=True),
    'accuracy' : make_scorer(accuracy_score, greater_is_better=True)
}

In [40]:
X, y = ceramiche_DB()
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [41]:
result = {}

import warnings
with warnings.catch_warnings():
    warnings.simplefilter("ignore")

    for name, (clf, params) in tqdm(classifiers.items()):
        grid = GridSearchCV(clf, params, scoring='accuracy', cv = 5, n_jobs = -1)
        s = cross_validate(grid, X, y, cv=5, scoring = scores)
        result[name] = s


100%|██████████| 5/5 [43:07<00:00, 517.43s/it]


In [42]:
rows = ['sensibility', 'specificity', 'accuracy']
columns = ['mean', 'std']

row_format ="{:>20}" * (len(columns) + 1)
col_format ="{:>20}" * (len(columns) + 1)

for n, r in result.items():
    print(n.upper())

    data = [(r["test_"+x].mean(), r["test_"+x].std()) for x in rows]

    print(col_format.format("", *columns))
    for name, row in zip(rows, data):
        print(row_format.format(name, *row))

    print()
    print()


NEAREST NEIGHBORS
                                    mean                 std
         sensibility  0.9522058823529411 0.06874115230894264
         specificity  0.9333333333333333 0.13333333333333336
            accuracy  0.9477272727272726 0.04958505506652598


SVC
                                    mean                 std
         sensibility  0.9404411764705882 0.09114088489615395
         specificity  0.8666666666666666 0.16329931618554522
            accuracy  0.9212121212121213 0.05538080873483959


DECISION TREE
                                    mean                 std
         sensibility  0.9286764705882353 0.11399667633107935
         specificity  0.9333333333333333 0.13333333333333336
            accuracy   0.931060606060606 0.07742112903661345


RANDOM FOREST
                                    mean                 std
         sensibility  0.9882352941176471 0.02352941176470589
         specificity  0.9333333333333333 0.13333333333333336
            accuracy  0.97348