In [None]:
%load_ext autoreload
%autoreload 2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

plt.rcParams['font.size'] = 28
plt.rcParams['figure.figsize'] = [10, 8]
plt.rcParams['lines.linewidth'] = 2.5
plt.rcParams['savefig.bbox'] = 'tight'
plt.rcParams["savefig.dpi"] = 300

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import numpy as np
from sklearn.utils import shuffle

data = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(
    data.data, data.target, stratify=data.target, random_state=0)
lr = make_pipeline(StandardScaler(), LogisticRegression(random_state=0))
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)

In [None]:
y_pred

In [None]:
confusion_matrix(y_test, y_pred)

In [None]:
ConfusionMatrixDisplay.from_estimator(lr, X_test, y_test)
plt.savefig("images/confusion_matrix_normal.png", transparent=True)

In [None]:
y_true = np.zeros(100, dtype=int)
y_true[:10] = 1
y_pred_1 = np.zeros(100, dtype=int)
y_pred_2 = y_true.copy()
y_pred_2[10:20] = 1
y_pred_3 = y_true.copy()
y_pred_3[5:15] = 1 - y_pred_3[5:15]

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay
from matplotlib.colors import Normalize

fig, axes = plt.subplots(1, 3, figsize=(16, 6))
for i, (ax, y_pred) in enumerate(zip(axes, [y_pred_1, y_pred_2, y_pred_3])):
    ConfusionMatrixDisplay(confusion_matrix(y_true, y_pred), display_labels=['N', 'P']).plot(ax=ax, cmap='gray_r')
    ax.images[-1].colorbar.remove()
    ax.images[0].set_norm(Normalize(vmin=0, vmax=100))
fig.tight_layout()
plt.savefig("images/confusion_matrix_90_neg.png", transparent=True,)

In [None]:
def make_2x2_matrix(labels, fontsize=24):
    plt.figure(figsize=(10, 6))
    plt.vlines([.5], ymin=-.5, ymax=1.5, linestyle=':')
    plt.hlines([.5], xmin=-.5, xmax=1.5, linestyle=':')
    plt.xticks([0, 1], ['predicted negative', 'predicted positive'])
    plt.yticks([1, 0], ['actual negative', 'actual positive'])
    a = plt.text(0, 1, labels[0], horizontalalignment='center', verticalalignment='center', fontsize=fontsize)
    b = plt.text(0, 0, labels[1], horizontalalignment='center', verticalalignment='center', fontsize=fontsize)
    c = plt.text(1, 1, labels[2], horizontalalignment='center', verticalalignment='center', fontsize=fontsize)
    d = plt.text(1, 0, labels[3], horizontalalignment='center', verticalalignment='center', fontsize=fontsize)
    return a, b, c, d
make_2x2_matrix(['True\nNegative', 'False\nNegative', 'False\nPositive', 'True\nPositive'], fontsize=28)
plt.savefig("images/confusion_matrix.png")

In [None]:
res = make_2x2_matrix(['True Negative Rate\nSpecificity', 'False Negative Rate\nMiss rate', 'False Positive Rate\nFall-out', 'True Positive Rate\nRecall\nSensitivity'],
               fontsize=16)
res[2].set_fontweight('bold')
res[3].set_fontweight('bold')
plt.title("Normalized by true condition")
plt.savefig("images/confusion_matrix_norm_true.png")

In [None]:
res = make_2x2_matrix(['Negative Predictive Value', 'False Omission Rate', 'False Discovery Rate', 'Precision\nPositive Predictive Value'],
               fontsize=16)
res[3].set_fontweight('bold')
plt.title("Normalized by predicted condition")
plt.savefig("images/confusion_matrix_norm_pred.png")

In [None]:
y_true = [1, 0, 0, 0, 0, 0, 0, 1]
y_pred = [1, 1, 1, 1, 0, 0, 1, 0]

In [None]:
from sklearn.metrics import recall_score

In [None]:
recall_score(y_true, y_pred, average='weighted')

In [None]:
6*(1/3) + 2/2)/8

In [None]:
recall_score(y_true, y_pred, average='macro')

In [None]:
(1/2 + 1/3)/2

In [None]:
recall_score(y_true, y_pred)

In [None]:
recall_score(y_true, y_pred, pos_label=0)

In [None]:
from sklearn.metrics import classification_report

In [None]:
print(classification_report(y_true, y_pred))

In [None]:
from sklearn.datasets import load_digits
from sklearn.ensemble import RandomForestClassifier
from sklearn.

digits = load_digits()
X, y = digits.data, digits.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, stratify=y, random_state=42)

rf = RandomForestClassifier(random_state=42).fit(X_train, y_train)

In [None]:
fig, ax = plt.subplots()
ax.plot([0, 1, 2, 3, 4], [0.1, 0.24, 0.5, 0.6, 0.8])
ax.set_ylabel("Partial Dependence")
fig.savefig("images/partial_dependence_example.png", transparent=True)

In [None]:
from sklearn.metrics import PrecisionRecallDisplay
from sklearn.metrics import RocCurveDisplay
from sklearn.inspection import PartialDependenceDisplay

In [None]:
PartialDependenceDisplay.from_estimator

In [None]:
from sklearn.calibration import CalibrationDisplay

In [None]:
RocCurveDisplay.from_estimator

In [None]:
import seaborn as sns

In [None]:
sns.set_theme()