# Confusion Matrix, precession, recall

In [1]:
import numpy as np
from sklearn import datasets

In [2]:
digits = datasets.load_digits()
X = digits.data
y = digits.target.copy()

In [3]:
y[digits.target == 9] = 1
y[digits.target != 9] = 0

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 666)

In [5]:
from sklearn.linear_model import LogisticRegression
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [6]:
log_reg.score(X_test, y_test)

0.9755555555555555

In [7]:
log_reg_predict = log_reg.predict(X_test)

In [8]:
def TN(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true == 0)&(y_predict == 0))

def TP(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true == 1)&(y_predict == 1))

def FN(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true == 1)&(y_predict == 0))

def FP(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true == 0)&(y_predict == 1))


In [10]:
print('TN = {}'.format(TN(y_test, log_reg_predict)))
print('TP = {}'.format(TP(y_test, log_reg_predict)))
print('FN = {}'.format(FN(y_test, log_reg_predict)))
print('FP = {}'.format(FP(y_test, log_reg_predict)))

TN = 403
TP = 36
FN = 9
FP = 2


In [11]:
def confusion_matrix(y_true, y_predict):
    return np.array([
        [TN(y_true,y_predict), FP(y_true,y_predict)],
        [FN(y_true,y_predict), TP(y_true,y_predict)]
    ])

In [12]:
confusion_matrix(y_test, log_reg_predict)

array([[403,   2],
       [  9,  36]])

In [15]:
def precision_score(y_true,y_predict):
    try:
        return TP(y_true,y_predict)/(TP(y_true,y_predict)+FP(y_true,y_predict))
    except:
        return 0.0
    
def recall_score(y_true,y_predict):
    try:
        return TP(y_true,y_predict)/(TP(y_true,y_predict)+FN(y_true,y_predict))
    except:
        return 0.0

In [16]:
precision_score(y_test, log_reg_predict)

0.9473684210526315

In [17]:
recall_score(y_test, log_reg_predict)

0.8

# sickit-learn中的精准率和召回率

In [19]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, log_reg_predict)

array([[403,   2],
       [  9,  36]], dtype=int64)

In [20]:
from sklearn.metrics import precision_score
precision_score(y_test, log_reg_predict)

0.9473684210526315

In [21]:
from sklearn.metrics import recall_score
recall_score(y_test, log_reg_predict)

0.8

In [22]:
from playML.metrics import precision_score
precision_score(y_test, log_reg_predict)

0.9473684210526315

In [23]:
from playML.metrics import recall_score
recall_score(y_test, log_reg_predict)

0.8