### Evaluation methods for classification

In [20]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

# load data
from sklearn.datasets import load_digits
from sklearn.cross_validation import train_test_split
from sklearn.svm import LinearSVC

# create model
digits = load_digits()
X, y = digits.data, digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

classifier = LinearSVC(random_state=0).fit(X_train, y_train)
y_test_pred = classifier.predict(X_test)

print("X.shape: " + str(X.shape))
print("Accuracy: %f" % classifier.score(X_test, y_test))

X.shape: (1797, 64)
Accuracy: 0.953333


### Binary Tasks - Precision and Recall

In [9]:
y_even = y % 2
X_train, X_test, y_train, y_test = train_test_split(X, y_even, random_state=42)

classifier = LinearSVC().fit(X_train, y_train)
y_test_pred = classifier.predict(X_test)

In [10]:
confusion_matrix(y_test, y_test_pred)

array([[213,   6],
       [ 31, 200]])

Binary confusion matrix:

<table>
<tr><td>True Positive (TP)</td><td>False Negative (FN) </td></tr>
<tr><td>False Positive (FP) </td><td>True Negative (TN) </td></tr>
</table>

$$ \text{precision} = \frac{TP}{FP + TP} $$

$$ \text{recall} = \frac{TP}{FN + TP} $$

$$ \text{accuracy} = \frac{TP + TN}{FP + FN + TP + TN} $$

$$ f_1 = 2 \frac{\text{precision} \cdot \text{recall}}{\text{precision} + \text{recall}} $$


In [7]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_test_pred))

             precision    recall  f1-score   support

          0       0.82      0.98      0.89       219
          1       0.97      0.79      0.87       231

avg / total       0.90      0.88      0.88       450

