# 实现混淆矩阵，精准率和召回率

In [1]:
import numpy as np
from sklearn import datasets

In [2]:
digits = datasets.load_digits()
X = digits.data
y = digits.target.copy()

y[digits.target == 9] = 1
y[digits.target != 9] = 0

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=666)

In [4]:
from sklearn.linear_model import LogisticRegression
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
log_reg.score(X_test, y_test)

0.96111111111111114

In [5]:
y_predict = log_reg.predict(X_test)

In [6]:
def TN(y_test, y_predict):
    return np.sum((y_test == 0) & (y_predict == 0))
def TP(y_test, y_predict):
    return np.sum((y_test == 1) & (y_predict == 1))
def FN(y_test, y_predict):
    return np.sum((y_test == 1) & (y_predict == 0))
def FP(y_test, y_predict):
    return np.sum((y_test == 0) & (y_predict == 1))

In [7]:
tn, fp = TN(y_test, y_predict), FP(y_test, y_predict)
fn, tp = FN(y_test, y_predict), TP(y_test, y_predict)

In [8]:
tn, fp, fn, tp

(318, 6, 8, 28)

In [9]:
def confusion_matrix(y_test, y_predict):
    return np.array([
        [TN(y_test, y_predict), FP(y_test, y_predict)],
        [FN(y_test, y_predict), TP(y_test, y_predict)]
    ])

In [10]:
confusion_matrix(y_test, y_predict)

array([[318,   6],
       [  8,  28]])

#### 精准率与召回率

In [17]:
def precision_score(y_test, y_predict):
    tp = TP(y_test, y_predict)
    fp = FP(y_test, y_predict)
    try:
        return tp / (tp + fp)
    except:
        return 0.0
def recall_score(y_test, y_predict):
    tp = TP(y_test, y_predict)
    fn = FN(y_test, y_predict)
    try:
        return tp / (tp + fn)
    except:
        return 0.0

In [15]:
precision_score(y_test, y_predict)

0.82352941176470584

In [18]:
recall_score(y_test, y_predict)

0.77777777777777779

#### scikit-learn中的混淆矩阵，精准率和召回率

In [19]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

In [20]:
confusion_matrix(y_test, y_predict)

array([[318,   6],
       [  8,  28]])

In [21]:
precision_score(y_test, y_predict)

0.82352941176470584

In [22]:
recall_score(y_test, y_predict)

0.77777777777777779