In [None]:
import pandas as pd
from pandarallel import pandarallel
from sklearn.metrics import precision_recall_curve, PrecisionRecallDisplay, average_precision_score

## Precision-recall curve

In [None]:
def get_precision_recall_curve(model, dataset, positive_label, num_classes):
    labels = dataset['label']#.astype('int')
    labels[labels != positive_label] = 0
    labels[labels == positive_label] = 1
    labels = labels.astype('int')
    
    preds = dataset.progress_apply(
        lambda x: model.predict(
            x['document'], 
            k=num_classes
        ), 
        axis=1
    )
    preds = preds.apply(
        lambda x: np.array(x[1])[np.array(x[0]) == positive_label]
    )
    
    return precision_recall_curve(labels, preds)

## F-score

In [None]:
def get_fbeta_score(precision, recall, beta=1):
    coefficient = (1 + beta**2)
    numerator = precision * recall
    denominator = ((beta**2) * precision) + recall
    return coefficient * (numerator/denominator)

## Optimal threshold (based on F-score) and its precision-recall

In [None]:
def get_optimal_threshold(thresholds, fbeta_scores, precisions, recalls):
    optimal_threshold = thresholds[fbeta_scores.argmax()]
    optimal_precision = precisions[fbeta_scores.argmax()]
    optimal_recall = recalls[fbeta_scores.argmax()]
    
    return optimal_threshold, optimal_precision, optimal_recall, fbeta_scores.max()