## Evaluation of ML models

### 1. Load the libraries

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from adspy_shared_utilities import plot_class_regions_for_classifier
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

### 2. Load the digits dataset

In [None]:
dataset = load_digits()
X, y = dataset.data, dataset.target
for class_name, class_count in zip(dataset.target_names, np.bincount(dataset.target)):
    print(class_name,class_count)

y_binary_imbalanced = y.copy()
y_binary_imbalanced[y_binary_imbalanced != 1] = 0
print("original labels: ", y[1:30])
print("new labels: ", y_binary_imbalanced[1:30])

### 3. SVM Classifier & confusion matrix

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
X_train, X_test, y_train, y_test = train_test_split(X, y_binary_imbalanced, random_state = 0)
svm = SVC(kernel='rbf', C=1).fit(X_train, y_train)
svm_pred = svm.predict(X_test)
confusion_svm = confusion_matrix(y_test, svm_pred)
print("SVM (linear, C=1): \n", confusion_svm)

### 4. Classification Report

In [None]:
from sklearn.metrics import classification_report
print("Classification report: \n", classification_report(y_test, svm_pred))

### 5. Cross validation score

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
dataset = load_digits()
X, y = dataset.data, dataset.target == 1
clf = SVC(kernel='linear', C=1)

print('Cross-validation (accuracy)', cross_val_score(clf, X, y, cv=5))
print('Cross-validation (AUC)', cross_val_score(clf, X, y, cv=5, scoring = 'roc_auc'))
print('Cross-validation (recall)', cross_val_score(clf, X, y, cv=5, scoring = 'recall'))

### 6. GridSearchCV Evaluation Metrics

#### 6a. Load the data

In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_auc_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

dataset = load_digits()
X, y = dataset.data, dataset.target == 1
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

clf = SVC(kernel='rbf')
grid_values = {'gamma': [0.01, 0.1, 1, 10]}

#### 6b. Check the accuracy based on provided GAMMA values

In [None]:
# default metric to optimize over grid parameters: accuracy
grid_clf_acc = GridSearchCV(clf, param_grid = grid_values)
grid_clf_acc.fit(X_train, y_train)
y_decision_fn_scores_acc = grid_clf_acc.decision_function(X_test) 

print('Grid best parameter (max. accuracy): ', grid_clf_acc.best_params_)
print('Grid best score (accuracy): ', grid_clf_acc.best_score_)

#### 6c. Check the Precision and Recall

In [None]:
grid_clf_precision = GridSearchCV(clf, param_grid = grid_values, scoring = 'precision')
grid_clf_precision.fit(X_train, y_train)
y_decision_fn_scores_pre = grid_clf_precision.decision_function(X_test) 

print('Test set Precision: ', precision_score(y_test, y_decision_fn_scores_pre))
print('Grid best parameter (max. Precision): ', grid_clf_precision.best_params_)
print('Grid best score (Precision): ', grid_clf_precision.best_score_)



#### 6d. Decision Tree and Metrics

In [None]:
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier(max_depth=2).fit(X_train, y_train)
tree_predicted = dt.predict(X_test)
confusion = confusion_matrix(y_test, tree_predicted)
print('Decision tree classifier (max_depth = 2)\n', confusion)

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
# Accuracy = TP + TN / (TP + TN + FP + FN)
# Precision = TP / (TP + FP)
# Recall = TP / (TP + FN)  Also known as sensitivity, or True Positive Rate
# F1 = 2 * Precision * Recall / (Precision + Recall) 
print('Accuracy: {:.2f}'.format(accuracy_score(y_test, tree_predicted)))
print('Precision: {:.2f}'.format(precision_score(y_test, tree_predicted)))
print('Recall: {:.2f}'.format(recall_score(y_test, tree_predicted)))
print('F1: {:.2f}'.format(f1_score(y_test, tree_predicted)))

### 7. Simple GridSearch Example Test

In [None]:
from sklearn.datasets import load_iris
iris = load_iris()
from sklearn.svm import SVC
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=0)
print("Size of training set: %d size of test set: %d" % (X_train.shape[0], X_test.shape[0]))
best_score = 0
for gamma in [0.01, 0.1, 1, 10]:
    for C in [0.01, 0.1, 1, 10]:
        svm = SVC(gamma=gamma, C=C)
        svm.fit(X_train, y_train)
        score = svm.score(X_test, y_test)
        if score > best_score:
            best_score = score
            best_parameters = {'C': C, 'gamma': gamma}

print("best score: ", best_score)
print("best parameters: ", best_parameters)