In [None]:
### Basic Libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.io import loadmat

### Classifiers
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier


#### Datasets, Metrics and Preprocessing

from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score, cross_val_predict, cross_validate
from sklearn.preprocessing import StandardScaler
from sklearn import datasets
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score

from sklearn.utils.multiclass import unique_labels

In [None]:
def plot_confusion(y,ypred,Method):
    """
    This function plots the confusion matrix in the classify function
    """
    conf = confusion_matrix(y,ypred)
    conf_std = conf.astype('float') / conf.sum(axis=1)[:, np.newaxis]
#     print(conf_std)
#     print('Cross Validated Accuracy: \n', conf)
    
    classes = unique_labels(y,ypred)
    cmap = plt.cm.Blues
    title = 'Confusion Matrix for '+ str(Method).split('(',1)[0]
    fmt = 'd'
    thresh = 0.5 #conf_std.max() / 2.
    
    fig, ax = plt.subplots()
    im = ax.imshow(conf_std, interpolation = 'nearest', cmap = cmap)
    ax.figure.colorbar(im,ax=ax)
    ax.set(xticks = np.arange(conf.shape[1]),
           yticks = np.arange(conf.shape[0]),
           xticklabels = classes, yticklabels = classes,
           title = title,
           ylabel = 'True', xlabel = 'Predicted')
    
    for i in range(conf.shape[0]):
        for j in range(conf.shape[1]):
            ax.text(j, i, format(conf[i, j], fmt),
                    ha="center", va="center",
                    color="white" if conf_std[i, j] > thresh else "black")
    fig.tight_layout()
    return ax, conf

In [None]:
def del_nan(X,y):
    df = pd.DataFrame(X)
    df['y'] = y
    
    # report the number of deletions
    nb_del = np.shape(df[df.isnull().any(axis=1)])[0]
    
    df = df[-df.isnull().any(axis=1)]
    X = df.drop('y', axis=1)
    y = df['y']
    return X, y, nb_del

In [None]:
def classify(X,y,Method):
    """
    This function does the classification on the data 'X'
    and labels 'y' with the classification 'Method' of 
    your choice. It prints the confusion matrix with its plot.
    
    The classification is based on 10-fold cross validation
    repeated 10 times for randomness.
    """
    # initializing an array to stack the predicted values
    # and repeat the y values to contrsuct the full confusion
    # matrix
    ypred = np.array([])
    y_10x = np.array([])
    test_scores = np.array([])
    fit_time = np.array([])
#     f1_scores = np.array([])
    clf = Method
    
    # Delete Nans first
    X, y, nb_del = del_nan(X,y)
    # Training for 10 times, each time with 10-fold cross validation
    for n in range(10):
        ypred_new = cross_val_predict(clf, X, y, cv=10)
        ypred = np.concatenate((ypred, ypred_new))
        y_10x = np.concatenate((y_10x,y))
        # Aother way for cross validation for retreiving the test scores
        cv_results = cross_validate(clf, X, y, cv=10)
        test_scores = np.concatenate((test_scores,cv_results['test_score']))
        fit_time = np.concatenate((fit_time,cv_results['fit_time']))

    _, conf = plot_confusion(y_10x, ypred, clf)
    print('The model score is %0.3f (+/- %0.3f)'% (np.mean(test_scores),np.std(test_scores)))
    print('The IQR is %0.3f between [%0.2f,%0.2f]'% (np.percentile(test_scores,75)-np.percentile(test_scores,25),\
                                                    np.percentile(test_scores,25),np.percentile(test_scores,75)))
    print('There is %d deleted rows'% nb_del)
    print(classification_report(y_10x, ypred))
    return conf, test_scores, fit_time