# Confusion Matrix

A confusion matrix shows the predicted values vs. the actual values by counting the true positives, true negatives, false positives, and false negatives.

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd

Generate some data

In [None]:
from sklearn.datasets import make_blobs

X, y = make_blobs(n_samples=1000, centers=2, cluster_std=3, random_state=42)

print(f"Labels: {y[:10]}")
print(f"Data: {X[:10]}")

In [None]:
# Visualizing both classes
plt.scatter(X[:, 0], X[:, 1], c=y)

Split our data into training and testing data

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

Create a logistic regression model

In [None]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression()
classifier

Fit (train) our model by using the training data

In [None]:
classifier.fit(X_train, y_train)

Validate the model by using the test data

In [None]:
print(f"Training Data Score: {classifier.score(X_train, y_train)}")
print(f"Testing Data Score: {classifier.score(X_test, y_test)}")

Create a confusion matrix

In [None]:
from sklearn.metrics import confusion_matrix, classification_report

y_true = y_test
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_true, y_pred)
cm

In [None]:
from sklearn import metrics
cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = cm, display_labels = [False, True])

cm_display.plot()
plt.show()

The accuracy of the model on the test data is TP + TN / (TP + FP + TN + FN)

In [None]:
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
accuracy = (tp + tn) / (tp + fp + tn + fn) # (111 + 128) / (111 + 5 + 128 + 6)
print(f"Accuracy: {accuracy}")

Generate a classification report for accuracy, precision, recall, and F1.

In [None]:
print(classification_report(y_true, y_pred))