In [1]:
import numpy as np
import pandas as pd
import sklearn.metrics as sk
import matplotlib.pyplot as plt

In [2]:
thresholds = np.arange(start=0.2, stop=0.7, step=0.05)
thresholds

array([0.2 , 0.25, 0.3 , 0.35, 0.4 , 0.45, 0.5 , 0.55, 0.6 , 0.65])

In [None]:
def precision_recall_curve(y_true, pred_scores, thresholds):
    precisions = []
    recalls = []
    
    for threshold in thresholds:
        y_pred = ["positive" if score >= threshold else "negative" for score in pred_scores]

        precision = sk.precision_score(y_true=y_true, y_pred=y_pred, pos_label="positive")
        recall = sk.recall_score(y_true=y_true, y_pred=y_pred, pos_label="positive")
        
        precisions.append(precision)
        recalls.append(recall)

    return precisions, recalls

precisions, recalls = precision_recall_curve(y_true=y_true, pred_scores=pred_scores,thresholds=thresholds)

In [None]:
# Plot the results
plt.plot(recalls, precisions, linewidth=4, color="red")
plt.xlabel("Recall", fontsize=12, fontweight='bold')
plt.ylabel("Precision", fontsize=12, fontweight='bold')
plt.title("Precision-Recall Curve", fontsize=15, fontweight="bold")
plt.show()

Note that as the recall increases, the precision decreases. The reason is that when the number of positive samples increases (high recall), the accuracy of classifying each sample correctly decreases (low precision)

In [None]:
f1 = 2 * ((np.array(precisions) * np.array(recalls)) / (np.array(precisions) + np.array(recalls)))

In [None]:
plt.plot(recalls, precisions, linewidth=4, color="red", zorder=0)
plt.scatter(recalls[5], precisions[5], zorder=1, linewidth=6)

plt.xlabel("Recall", fontsize=12, fontweight='bold')
plt.ylabel("Precision", fontsize=12, fontweight='bold')
plt.title("Precision-Recall Curve", fontsize=15, fontweight="bold")
plt.show()

In [None]:
precisions.append(1)
recalls.append(0)

precisions = np.array(precisions)
recalls = np.array(recalls)

AP = np.sum((recalls[:-1] - recalls[1:]) * precisions[:-1])
print(AP)

### AVERAGE PRECISION

1. Generate the prediction scores using the model.
2. Convert the prediction scores to class labels.
3. Calculate the confusion matrix.
4. Calculate the precision and recall metrics.
5. Create the precision-recall curve.
6. Measure the average precision.