# Binary classification risk control - Theoretical tests - Target API

In [41]:
import numpy as np
import itertools

from mapie.binary_risk_control_target_api import BinaryClassificationRisk, BinaryClassificationController

ModuleNotFoundError: No module named 'mapie'

In [42]:
class RandomClassifier:
    def __init__(self, seed=42, threshold=0.5):
        self.random_state = np.random.RandomState(seed)
        self.threshold = threshold

    def predict_proba(self, X):
        probs = np.round(self.random_state.rand(len(X)), 2)
        return np.vstack([1 - probs, probs]).T

    def predict(self, X):
        probs = self.predict_proba(X)[:, 1]
        return (probs >= self.threshold).astype(int)

In [43]:
precision = BinaryClassificationRisk(
    occurrence=lambda y_true, y_pred: None if y_pred == 0 else int(y_pred == y_true),
    higher_is_better=True,
    binary=True,
)

false_discovery_rate = precision.transform_to_opposite()

recall = BinaryClassificationRisk(
    occurrence=lambda y_true, y_pred: None if y_true == 0 else int(y_pred == y_true),
    higher_is_better=True,
    binary=True,
)

false_negative_rate = recall.transform_to_opposite()

accuracy = BinaryClassificationRisk(
    occurrence=lambda y_true, y_pred: int(y_pred == y_true),
    higher_is_better=True,
    binary=True,
)

NameError: name 'BinaryClassificationRisk' is not defined

In [44]:
N_values = [1, 100]  # size of the calibration set
p = 0.5  # proportion of positives in the calibration set
metrics = ['recall', 'precision']
target_levels = [0.2, 0.8]
predict_params_sets = [np.linspace(0, 0.99, 100), [0.5]]
confidence_levels = [0.1, 0.9]

n_repeats = 100

In [45]:
combinations = list(itertools.product(N_values, metrics, target_levels, predict_params_sets, confidence_levels))

# for i, combination in enumerate(combinations[0], 1):
i, combination = 1, combinations[0]

N, metric, target_level, predict_params, confidence_level = combination
print(f"Combination {i}:")
print(f"N = {N}")
print(f"Metric = {metric}")
print(f"Target level = {target_level}")
print(f"Predict params = {predict_params}")
print(f"Confidence Level = {confidence_level}")

Combination 1:
N = 1
Metric = recall
Target level = 0.2
Predict params = [0.   0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09 0.1  0.11 0.12 0.13
 0.14 0.15 0.16 0.17 0.18 0.19 0.2  0.21 0.22 0.23 0.24 0.25 0.26 0.27
 0.28 0.29 0.3  0.31 0.32 0.33 0.34 0.35 0.36 0.37 0.38 0.39 0.4  0.41
 0.42 0.43 0.44 0.45 0.46 0.47 0.48 0.49 0.5  0.51 0.52 0.53 0.54 0.55
 0.56 0.57 0.58 0.59 0.6  0.61 0.62 0.63 0.64 0.65 0.66 0.67 0.68 0.69
 0.7  0.71 0.72 0.73 0.74 0.75 0.76 0.77 0.78 0.79 0.8  0.81 0.82 0.83
 0.84 0.85 0.86 0.87 0.88 0.89 0.9  0.91 0.92 0.93 0.94 0.95 0.96 0.97
 0.98 0.99]
Confidence Level = 0.1


In [46]:
X_calibrate = list(range(1, N+1))
y_calibrate = [1] * int(p*N) + [0] * (N - int(p*N))
np.random.seed(42)
np.random.shuffle(y_calibrate)

In [47]:
clf = RandomClassifier()

if metric == 'precision':
    risk = precision
    theoretical_value = p
elif metric == 'recall':
    risk = recall
    theoretical_value = 1 - clf.threshold

all_valid_parameters = []

for _ in range(n_repeats):
    
    controller = BinaryClassificationController(
        predict_function=clf.predict_proba,
        risk=risk,
        target_level=target_level,
        confidence_level=confidence_level,
        best_predict_param_choice="auto",
    )
    controller.calibrate(X_calibrate, y_calibrate)
    
    valid_parameters = controller.valid_thresholds
    all_valid_parameters.append(valid_parameters)

if metric == 'precision':
    nb_actual_valid = sum(1 for x in all_valid_parameters if p >= theoretical_value)
elif metric == 'recall':
    nb_actual_valid = sum(1 for x in all_valid_parameters if x <= (1 - theoretical_value))

if nb_actual_valid/len(all_valid_parameters) >= confidence_level:
    print("Risk controlled")
else:
    print("Risk not controlled")

NameError: name 'recall' is not defined