# Binary classification risk control - Theoretical tests

In [1]:
!pip freeze

absl-py==1.4.0
accelerate==1.9.0
aiofiles==24.1.0
aiohappyeyeballs==2.6.1
aiohttp==3.11.15
aiosignal==1.4.0
alabaster==1.0.0
albucore==0.0.24
albumentations==2.0.8
ale-py==0.11.2
altair==5.5.0
annotated-types==0.7.0
antlr4-python3-runtime==4.9.3
anyio==4.9.0
argon2-cffi==25.1.0
argon2-cffi-bindings==21.2.0
array_record==0.7.2
arviz==0.22.0
astropy==7.1.0
astropy-iers-data==0.2025.7.21.0.41.39
astunparse==1.6.3
atpublic==5.1
attrs==25.3.0
audioread==3.0.1
autograd==1.8.0
babel==2.17.0
backcall==0.2.0
backports.tarfile==1.2.0
beautifulsoup4==4.13.4
betterproto==2.0.0b6
bigframes==2.11.0
bigquery-magics==0.10.1
bleach==6.2.0
blinker==1.9.0
blis==1.3.0
blobfile==3.0.0
blosc2==3.6.1
bokeh==3.7.3
Bottleneck==1.4.2
bqplot==0.12.45
branca==0.8.1
Brotli==1.1.0
build==1.2.2.post1
CacheControl==0.14.3
cachetools==5.5.2
catalogue==2.0.10
certifi==2025.7.14
cffi==1.17.1
chardet==5.2.0
charset-normalizer==3.4.2
chex==0.1.89
clarabel==0.11.1
click==8.2.1
cloudpathlib==0.21.1
cloudpickle==3.1.1
cmake=

In [None]:
!pip uninstall -y MAPIE

Found existing installation: MAPIE 1.0.1
Uninstalling MAPIE-1.0.1:
  Successfully uninstalled MAPIE-1.0.1


In [2]:
!pip install git+https://github.com/scikit-learn-contrib/MAPIE.git@binary-risk-control-draft --ignore-requires-python

Collecting git+https://github.com/scikit-learn-contrib/MAPIE.git@binary-risk-control-draft
  Cloning https://github.com/scikit-learn-contrib/MAPIE.git (to revision binary-risk-control-draft) to /tmp/pip-req-build-oenll_dj
  Running command git clone --filter=blob:none --quiet https://github.com/scikit-learn-contrib/MAPIE.git /tmp/pip-req-build-oenll_dj
  Running command git checkout -b binary-risk-control-draft --track origin/binary-risk-control-draft
  Switched to a new branch 'binary-risk-control-draft'
  Branch 'binary-risk-control-draft' set up to track remote branch 'binary-risk-control-draft' from 'origin'.
  Resolved https://github.com/scikit-learn-contrib/MAPIE.git to commit 09727f9ef6e367930e8355c6e5c1a5afbe339b40
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: MAPIE
  Building wheel for MAPIE (pyproject.toml) ... [

In [3]:
import numpy as np
import itertools

from mapie.risk_control_draft import BinaryClassificationController

In [4]:
class RandomClassifier:
    def __init__(self, seed=42, threshold=0.5):
        self.random_state = np.random.RandomState(seed)
        self.threshold = threshold

    def predict_proba(self, X):
        probs = np.round(self.random_state.rand(len(X)), 2)
        return np.vstack([1 - probs, probs]).T

    def predict(self, X):
        probs = self.predict_proba(X)[:, 1]
        return (probs >= self.threshold).astype(int)

In [5]:
N = 100  # size of the calibration set
p = 0.5  # proportion of positives in the calibration set
metric = "precision"
target_level = 0.8
predict_params = np.linspace(0, 0.99, 100)
confidence_level = 0.9

n_repeats = 100

In [6]:
print(f"N = {N}")
print(f"Metric = {metric}")
print(f"Target level = {target_level}")
print(f"Predict params = {predict_params}")
print(f"Confidence Level = {confidence_level}")

N = 100
Metric = precision
Target level = 0.8
Predict params = [0.   0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09 0.1  0.11 0.12 0.13
 0.14 0.15 0.16 0.17 0.18 0.19 0.2  0.21 0.22 0.23 0.24 0.25 0.26 0.27
 0.28 0.29 0.3  0.31 0.32 0.33 0.34 0.35 0.36 0.37 0.38 0.39 0.4  0.41
 0.42 0.43 0.44 0.45 0.46 0.47 0.48 0.49 0.5  0.51 0.52 0.53 0.54 0.55
 0.56 0.57 0.58 0.59 0.6  0.61 0.62 0.63 0.64 0.65 0.66 0.67 0.68 0.69
 0.7  0.71 0.72 0.73 0.74 0.75 0.76 0.77 0.78 0.79 0.8  0.81 0.82 0.83
 0.84 0.85 0.86 0.87 0.88 0.89 0.9  0.91 0.92 0.93 0.94 0.95 0.96 0.97
 0.98 0.99]
Confidence Level = 0.9


In [7]:
X_calibrate = list(range(1, N+1))
y_calibrate = [1] * int(p*N) + [0] * (N - int(p*N))
np.random.seed(42)
np.random.shuffle(y_calibrate)

In [8]:
clf = RandomClassifier()

if metric == "precision":
    theoretical_value = p
elif metric == "recall":
    theoretical_value = 1 - clf.threshold

all_valid_parameters = []

for _ in range(n_repeats):

    controller = BinaryClassificationController(
        fitted_binary_classifier=clf,
        metric="precision",
        target_level=target_level,
        confidence_level=confidence_level,
    )
    controller.calibrate(X_calibrate, y_calibrate)

    valid_parameters = controller.valid_thresholds
    all_valid_parameters.append(valid_parameters)

if metric == "precision":
    nb_actual_valid = sum(1 for x in all_valid_parameters if p >= theoretical_value)
elif metric == "recall":
    nb_actual_valid = sum(1 for x in all_valid_parameters if x <= (1 - theoretical_value))

if nb_actual_valid/len(all_valid_parameters) >= confidence_level:
    print("Risk controlled")
else:
    print("Risk not controlled")



ValueError: min() arg is an empty sequence