### 实现混淆矩阵，精准率和召回率

In [3]:
import numpy as np
from sklearn import datasets

In [6]:
digits = datasets.load_digits()
X = digits.data
# 如果不用copy，则y和digits指向同一个引用，修改y则会修改digits
y = digits.target.copy()

# 模拟数据倾斜，只关注数字为9的数据
y[digits.target == 9] = 1
y[digits.target != 9] = 0

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=666)

In [9]:
from sklearn.linear_model import LogisticRegression

log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
log_reg.score(X_test, y_test)



0.9755555555555555

In [10]:
y_log_predict = log_reg.predict(X_test)

In [12]:
# 对应的混淆矩阵记为matrix
# tn = matrix[0][0], 数据不是9，被预测不是9的数量
def TN(y_true, y_predict):
    return np.sum((y_true == 0) & (y_predict == 0))

TN(y_test, y_log_predict)

403

In [18]:
# fp = matrix[0][1]，数据不是9，但是被测为9的数据
def FP(y_true, y_predict):
    return np.sum((y_true == 0) & (y_predict == 1))

FP(y_test, y_log_predict)

2

In [19]:
# fn = matrix[1][0] 数据是9，被预测不是9的数量
def FN(y_true, y_predict):
    return np.sum((y_true == 1) & (y_predict == 0))

FN(y_test, y_log_predict)

9

In [20]:
# tp = matrix[1][1] 数据是9，被预测不是9的数量
def TP(y_true, y_predict):
    return np.sum((y_true == 1) & (y_predict == 1))

TP(y_test, ))

36

In [24]:
def confusion_matrix(y_true, y_predict):
    return np.array([
        [TN(y_true, y_predict), FP(y_true, y_predict)],
        [FN(y_true, y_predict), TP(y_true, y_predict)]
    ])

confusion_matrix(y_test, y_log_predict)

array([[403,   2],
       [  9,  36]])

In [25]:
def precision_score(y_true, y_predict):
    tp = TP(y_true, y_predict)
    fp = FP(y_true, y_predict)
    try:
        return tp / (tp + fp)
    except:
        return 0.0
    
precision_score(y_test, y_log_predict)

0.9473684210526315

In [26]:
def recall_score(y_true, y_predict):
    tp = TP(y_true, y_predict)
    fn = FN(y_true, y_predict)
    try:
        return tp / (tp + fn)
    except:
        return 0.0

recall_score(y_test, y_log_predict)

0.8

### scikit-learn中的混淆矩阵，精准率和召回率

In [27]:
from sklearn.metrics import confusion_matrix

confusion_matrix(y_test, y_log_predict)

array([[403,   2],
       [  9,  36]])

In [28]:
from sklearn.metrics import precision_score, recall_score

precision_score(y_test, y_log_predict)

0.9473684210526315

In [29]:
recall_score(y_test, y_log_predict)

0.8