# Precision Recall Trade Off

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets

digits = datasets.load_digits()
X = digits.data
y = digits.target.copy()

y[digits.target==9] = 1
y[digits.target!=9] = 0

In [2]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=666)

In [3]:
from sklearn.linear_model import LogisticRegression

log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
y_predict = log_reg.predict(X_test)

In [4]:
from sklearn.metrics import f1_score

f1_score(y_test, y_predict)

0.8674698795180723

In [5]:
from sklearn.metrics import  confusion_matrix

confusion_matrix(y_test, y_predict)

array([[403,   2],
       [  9,  36]])

In [6]:
from sklearn.metrics import precision_score

precision_score(y_test, y_predict)

0.9473684210526315

In [7]:
from sklearn.metrics import recall_score

recall_score(y_test, y_predict)

0.8

In [8]:
log_reg.decision_function(X_test)

array([-22.05695252, -33.02935768, -16.21330315, -80.37911802,
       -48.25122108, -24.54001722, -44.39165048, -25.04289608,
        -0.97825371, -19.71741442, -66.2513629 , -51.09597081,
       -31.49344381, -46.05332229, -38.67871282, -29.80467899,
       -37.58845784, -82.57566105, -37.81900216, -11.01161403,
        -9.17436552, -85.13001044, -16.71613172, -46.23720572,
        -5.32987792, -47.91759651, -11.66726435, -39.19597314,
       -25.25289264, -14.36642792, -16.99780033, -28.91901285,
       -34.33937219, -29.47599892,  -7.8580823 ,  -3.82091079,
       -24.08158273, -22.16359297, -33.61214815, -23.14018703,
       -26.91799781, -62.38933888, -38.85685534, -66.77255914,
       -20.14478204, -17.47881566, -18.06795442, -22.22221109,
       -29.62299198, -19.73169041,   1.49556288,   8.32084318,
       -36.29303091, -42.50727688, -25.90456248, -34.98955476,
        -8.42006278, -50.04722655, -51.48203383,  19.88962716,
        -8.91884565, -31.99339571, -11.66095233,  -0.47

In [9]:
log_reg.decision_function(X_test)[:10]

array([-22.05695252, -33.02935768, -16.21330315, -80.37911802,
       -48.25122108, -24.54001722, -44.39165048, -25.04289608,
        -0.97825371, -19.71741442])

In [10]:
log_reg.predict(X_test)[:10]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [11]:
decision_scores = log_reg.decision_function(X_test)

In [12]:
np.min(decision_scores)

-85.68604588355326

In [13]:
np.max(decision_scores)

19.889627156334733

In [14]:
y_predict_2 = np.array(decision_scores >= 5, dtype='int')

In [15]:
confusion_matrix(y_test, y_predict_2)

array([[404,   1],
       [ 21,  24]])

In [16]:
precision_score(y_test, y_predict_2)

0.96

In [17]:
recall_score(y_test, y_predict_2)

0.5333333333333333

In [18]:
y_predict_3 = np.array(decision_scores >= -5, dtype='int')

In [19]:
confusion_matrix(y_test, y_predict_3)

array([[390,  15],
       [  5,  40]])

In [20]:
precision_score(y_test, y_predict_3)

0.7272727272727273

In [21]:
recall_score(y_test, y_predict_3)

0.8888888888888888