## 实现混淆矩阵，精准率和召回率

In [91]:
import numpy as np
from sklearn import datasets

In [92]:
digits = datasets.load_digits()
X = digits.data
y = digits.target.copy()

y[digits.target==0] = 1
y[digits.target!=0] = 0

In [93]:
X.shape

(1797, 64)

In [94]:
y.shape

(1797,)

In [95]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=666)

In [96]:
from sklearn.linear_model import LogisticRegression

log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
log_reg.score(X_test, y_test)

0.99777777777777776

In [98]:
def TN(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true) == 0 & (y_predict == 0))

y_log_predict = log_reg.predict(X_test)
TN(y_test, y_log_predict)

404

In [99]:
def FP(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true == 0) & (y_predict == 1))

FP(y_test, y_log_predict)

1

In [100]:
def FN(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true == 1) & (y_predict == 0))

FN(y_test, y_log_predict)

0

In [101]:
def TP(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    return np.sum((y_true == 1) & (y_predict == 1))

TP(y_test, y_log_predict)

46

In [105]:
def confusion_matrix(y_true, y_predict):
    return np.array([
        [TN(y_true, y_predict), FP(y_true, y_predict)],
        [FN(y_true, y_predict), TP(y_true, y_predict)]
    ])

confusion_matrix(y_test, y_log_predict)

array([[404,   1],
       [  0,  46]])

In [106]:
def precision_score(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    tp = TP(y_true, y_predict)
    fp = FP(y_true, y_predict)
    try:
        return tp / (tp + fp)
    except:
        return 0.0
    
precision(y_test, y_log_predict)

0.97872340425531912

In [107]:
def recall_score(y_true, y_predict):
    assert len(y_true) == len(y_predict)
    tp = TP(y_true, y_predict)
    fn = FN(y_true, y_predict)
    try:
        return tp / (tp + fn)
    except:
        return 0.0
    
recall(y_test, y_log_predict)

1.0

### scikit-learn中的混淆矩阵，精准率和召回率

In [108]:
from sklearn.metrics import confusion_matrix

confusion_matrix(y_test, y_log_predict)

array([[403,   1],
       [  0,  46]])

In [109]:
from sklearn.metrics import precision_score

precision_score(y_test, y_log_predict)

0.97872340425531912

In [110]:
from sklearn.metrics import recall_score

recall_score(y_test, y_log_predict)

1.0