# Leitura de features de imagens.

Feature: HOG (a ser extraído usando o notebook $Extract\_Hog.ipynb$).

Classificadores a serem testados: $KNN$ (k: 9), $SVM$ (kernel: linear), $SVM$ (kernel: RBF), $Decision Tree$ (profundidade máxima: 10, critério: entropia), $Random Forest$ (profundidade máxima: 10, número de estimadores: 100) e $Adaboost$.

Métricas a serem computadas: accuracy, precision, recall, confusion matrix, F1, AUC da ROC.

Dica: para concatenar as features das samples, é recomendado colocar os feature vectors individuais em listas e depois usar a função np.asarray() para transformar as listas em ndarrays.

Links úteis:

https://docs.scipy.org/doc/numpy/reference/generated/numpy.asarray.html

https://docs.scipy.org/doc/numpy/reference/generated/numpy.save.html

https://docs.scipy.org/doc/numpy/reference/generated/numpy.load.html

http://scikit-learn.org/stable/modules/classes.html

http://scikit-learn.org/stable/modules/model_evaluation.html

In [2]:
# Code source: Gaël Varoquaux
#              Andreas Müller
# Modified for documentation by Jaques Grobler
# License: BSD 3 clause

import os
import numpy as np
import matplotlib.pyplot as plt
import itertools

from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn import metrics

%matplotlib inline

#############################################################################################################
# Adapted from: http://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html #
#               http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html     #
#############################################################################################################

def load_mnist_hog(npy_dir):
    
    # TO DO: Load MNIST files in directory npy_dir and return ndarrays with X (data) and y (labels).
    #        The shapes for these arrays must be X.shape = (n_samples, n_features) and y.shape = (n_samples).
    pass

def plot_confusion_matrix(cm, classes,
                          normalize=True,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):

    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    print('Confusion matrix, without normalization')
    print(cm)
    
    cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    
    print("Normalized confusion matrix")
    print(cm)
    
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.4f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    
    plt.show()

h = .02  # Step size in the mesh for plotting.

# Classifier names.
names = ["KNN", "Linear SVM", "RBF SVM", "Decision Tree",
         "Random Forest", "AdaBoost"]

# Presetting classifiers.
classifiers = [
    KNeighborsClassifier(9),
    SVC(kernel="linear", C=0.1),
    SVC(gamma=2, C=1),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=7, n_estimators=100),
    AdaBoostClassifier()
]

# Setting dataset.
npy_dir = '../../../data/mnist_feats/'

X_train, y_train = load_mnist_hog(os.path.join(npy_dir, 'train'))
X_test, y_test = load_mnist_hog(os.path.join(npy_dir, 'test'))

# Printing dataset shapes.
print('')
print('####################################')
print('####################################')
print('X_train', X_train.shape)
print('X_test', X_test.shape)
print('y_train', y_train.shape)
print('y_test', y_test.shape)

# Iterate over classifiers.
for name, clf in zip(names, classifiers):

    print('------------------------------------')
    print('    ', 'Classifier', name)
    
    # Fitting classifier to train data.
    clf.fit(X_train, y_train)
    
    # Obtaining class prediction for training data.
    y_pred_train = clf.predict(X_train)
    
    # Obtaining class prediction for unseen data.
    y_pred_test = clf.predict(X_test)
    
    # Computing error metrics in the training data.
    acc_train = metrics.accuracy_score(y_train, y_pred_train)
    pre_train = metrics.precision_score(y_train, y_pred_train)
    rec_train = metrics.recall_score(y_train, y_pred_train)
    f1_train  = metrics.f1_score(y_train, y_pred_train)
    roc_train = metrics.roc_auc_score(y_train, y_pred_train)
    cnf_train = metrics.confusion_matrix(y_train, y_pred_train)
    
    # Computing error metrics in the unseen data.
    acc_test = metrics.accuracy_score(y_test, y_pred_test)
    pre_test = metrics.precision_score(y_test, y_pred_test)
    rec_test = metrics.recall_score(y_test, y_pred_test)
    f1_test  = metrics.f1_score(y_test, y_pred_test)
    roc_test = metrics.roc_auc_score(y_test, y_pred_test)
    cnf_test = metrics.confusion_matrix(y_test, y_pred_test)
        
    # Printing error metrics.
    print('        ', 'Accuracy Train/Test', acc_train, acc_test)
    print('        ', 'Precision Train/Test', pre_train, pre_test)
    print('        ', 'Recall Train/Test', rec_train, rec_test)
    print('        ', 'F1 Train/Test', f1_train, f1_test)
    print('        ', 'ROC Train/Test', roc_train, roc_test)
    
    plot_confusion_matrix(cnf_train, ['class 0', 'class 1'], title='Confusion Matrix Train ' + name)
    plot_confusion_matrix(cnf_test, ['class 0', 'class 1'], title='Confusion Matrix Test ' + name)

TypeError: 'NoneType' object is not iterable