In [33]:
%pip install -r requirements.txt

Collecting puncc (from -r requirements.txt (line 6))
  Using cached puncc-0.8.0-py3-none-any.whl.metadata (13 kB)
Collecting joblib (from puncc->-r requirements.txt (line 6))
  Using cached joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting scikit-learn (from puncc->-r requirements.txt (line 6))
  Downloading scikit_learn-1.6.1-cp312-cp312-macosx_12_0_arm64.whl.metadata (31 kB)
Collecting tqdm (from puncc->-r requirements.txt (line 6))
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting scipy>=1.6.0 (from scikit-learn->puncc->-r requirements.txt (line 6))
  Downloading scipy-1.15.1-cp312-cp312-macosx_14_0_arm64.whl.metadata (61 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn->puncc->-r requirements.txt (line 6))
  Using cached threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)
Using cached puncc-0.8.0-py3-none-any.whl (70 kB)
Using cached joblib-1.4.2-py3-none-any.whl (301 kB)
Downloading scikit_learn-1.6.1-cp312-cp312-macosx_12_0_arm64.whl (11.2 M

In [4]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns

In [31]:
# Set random seed for reproducibility
tf.random.set_seed(0)
tf.keras.utils.set_random_seed(0)

In [5]:
data = pd.read_csv("./iris_synthetic_data.csv")

In [13]:
unique_labels = data['label'].unique()

In [15]:
label_mapping = pd.Series({l: i for i, l in enumerate(unique_labels)})
data['int_label'] = data['label'].map(label_mapping)
data

Unnamed: 0,sepal length,sepal width,petal length,petal width,label,int_label
0,5.2,3.8,1.5,0.3,Iris-setosa,0
1,5.3,4.1,1.5,0.1,Iris-setosa,0
2,4.8,3.1,1.5,0.2,Iris-setosa,0
3,5.2,3.7,1.5,0.2,Iris-setosa,0
4,4.9,3.0,1.5,0.3,Iris-setosa,0
...,...,...,...,...,...,...
2995,7.2,3.6,6.0,2.5,Iris-virginica,2
2996,7.3,3.0,6.2,2.1,Iris-virginica,2
2997,6.9,3.2,5.7,2.3,Iris-virginica,2
2998,7.5,2.8,6.0,2.0,Iris-virginica,2


In [21]:
# random splits the data into train, validation, and test
test = data.sample(frac = 0.1)
rest_part = data.drop(test.index)
validation = rest_part.sample(frac = 0.11)
train = data.drop(validation.index)

In [22]:
test

Unnamed: 0,sepal length,sepal width,petal length,petal width,label,int_label
1248,5.6,2.5,3.8,1.0,Iris-versicolor,1
1224,6.7,3.0,5.0,1.7,Iris-versicolor,1
1047,6.7,3.1,4.6,1.5,Iris-versicolor,1
355,5.0,3.5,1.3,0.3,Iris-setosa,0
2653,6.2,2.8,4.8,1.8,Iris-virginica,2
...,...,...,...,...,...,...
2240,6.0,3.0,5.0,1.8,Iris-virginica,2
2300,6.1,2.4,5.0,1.6,Iris-virginica,2
1694,6.0,2.9,4.5,1.4,Iris-versicolor,1
2450,7.7,3.0,6.0,2.3,Iris-virginica,2


In [23]:
validation

Unnamed: 0,sepal length,sepal width,petal length,petal width,label,int_label
239,4.8,3.2,1.6,0.2,Iris-setosa,0
82,5.7,4.4,1.5,0.4,Iris-setosa,0
2066,6.6,3.0,5.7,2.2,Iris-virginica,2
384,4.6,3.2,1.3,0.2,Iris-setosa,0
1435,6.2,2.2,4.5,1.4,Iris-versicolor,1
...,...,...,...,...,...,...
654,4.4,3.2,1.3,0.2,Iris-setosa,0
860,4.9,3.1,1.5,0.1,Iris-setosa,0
1149,5.4,3.0,4.5,1.5,Iris-versicolor,1
908,4.9,3.0,1.5,0.2,Iris-setosa,0


In [24]:
train

Unnamed: 0,sepal length,sepal width,petal length,petal width,label,int_label
0,5.2,3.8,1.5,0.3,Iris-setosa,0
1,5.3,4.1,1.5,0.1,Iris-setosa,0
2,4.8,3.1,1.5,0.2,Iris-setosa,0
3,5.2,3.7,1.5,0.2,Iris-setosa,0
4,4.9,3.0,1.5,0.3,Iris-setosa,0
...,...,...,...,...,...,...
2995,7.2,3.6,6.0,2.5,Iris-virginica,2
2996,7.3,3.0,6.2,2.1,Iris-virginica,2
2997,6.9,3.2,5.7,2.3,Iris-virginica,2
2998,7.5,2.8,6.0,2.0,Iris-virginica,2


In [25]:
x_train, y_train = train[['sepal length', 'sepal width', 'petal length', 'petal width']], train['label']
x_validation, y_validation = validation[['sepal length', 'sepal width', 'petal length', 'petal width']], validation['label']
x_test, y_test = test[['sepal length', 'sepal width', 'petal length', 'petal width']], test['label']

In [42]:

# Define the MLP model
model = tf.keras.models.Sequential([
  tf.keras.layers.Dense(16, activation='relu'),
  tf.keras.layers.Dense(32, activation='relu'),
  tf.keras.layers.Dense(3, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

y_train_fit = tf.keras.utils.to_categorical(y_train, num_classes=3)

# Train the model
model.fit(x_train, y_train_fit, epochs=16)


# Evaluate the model
y_test_fit = tf.keras.utils.to_categorical(y_test, num_classes=3)
loss, accuracy = model.evaluate(x_test, y_test_fit, verbose=0)
print(f"Test accuracy: {accuracy}")

Epoch 1/16
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 545us/step - accuracy: 0.4115 - loss: 1.1593 
Epoch 2/16
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 370us/step - accuracy: 0.8357 - loss: 0.5125
Epoch 3/16
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 361us/step - accuracy: 0.9663 - loss: 0.3321
Epoch 4/16
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 352us/step - accuracy: 0.9780 - loss: 0.2224
Epoch 5/16
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 380us/step - accuracy: 0.9829 - loss: 0.1532
Epoch 6/16
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 357us/step - accuracy: 0.9855 - loss: 0.1135
Epoch 7/16
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 363us/step - accuracy: 0.9870 - loss: 0.0905
Epoch 8/16
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 366us/step - accuracy: 0.9870 - loss: 0.0763
Epoch 9/16
[1m85/85[0m [32m━━━━━━━━━

# UQ

In [143]:
from deel.puncc.api.prediction import BasePredictor
from deel.puncc.classification import APS

# Instanciate the APS wrapper around the convnet predictor.
# The `train` argument is set to False as the model is already trained
model_cp = APS(model, train=False, rand=False)

In [144]:
# Compute the nonconformity scores on the calibration dataset
y_validation_int = validation['int_label'].to_numpy().astype(int)
model_cp.fit(X_calib=x_validation.to_numpy(), y_calib=y_validation_int)

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


In [145]:
# Use chooses the coverage target 1-alpha = 95%
alpha = 0.05

# The `predict` returns the output of the convnet model `y_pred` and
# the calibrated prediction set `set_pred`.
y_pred, set_pred = model_cp.predict(x_test, alpha=alpha)

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


In [146]:
set_pred

[[np.int64(1), np.int64(0), np.int64(2)],
 [np.int64(1), np.int64(2)],
 [np.int64(1), np.int64(2), np.int64(0)],
 [np.int64(0), np.int64(1)],
 [np.int64(2), np.int64(1)],
 [np.int64(0), np.int64(1)],
 [np.int64(1), np.int64(0), np.int64(2)],
 [np.int64(1), np.int64(2), np.int64(0)],
 [np.int64(0), np.int64(1)],
 [np.int64(2), np.int64(1)],
 [np.int64(2), np.int64(1)],
 [np.int64(1), np.int64(2)],
 [np.int64(2), np.int64(1)],
 [np.int64(0), np.int64(1)],
 [np.int64(1), np.int64(2), np.int64(0)],
 [np.int64(0), np.int64(1)],
 [np.int64(0), np.int64(1)],
 [np.int64(0), np.int64(1)],
 [np.int64(2), np.int64(1)],
 [np.int64(2)],
 [np.int64(2), np.int64(1)],
 [np.int64(1), np.int64(0), np.int64(2)],
 [np.int64(0), np.int64(1)],
 [np.int64(1), np.int64(2), np.int64(0)],
 [np.int64(1), np.int64(2), np.int64(0)],
 [np.int64(2), np.int64(1)],
 [np.int64(2), np.int64(1)],
 [np.int64(2), np.int64(1)],
 [np.int64(1), np.int64(2), np.int64(0)],
 [np.int64(0), np.int64(1)],
 [np.int64(1), np.int64(2)

In [147]:
sample = 10

# sort y_pred[sample] in reversed order
ranked_pred = y_pred[sample].argsort()[::-1]

# Plot results
# fig = plt.figure(figsize=(3, 3))
print(x_test.iloc[sample])
print(f"Point prediction: {np.argmax(y_pred[sample])}")
print(f"Prediction set: {set_pred[sample]}")
print(f"True label: {y_test.iloc[sample]}")

sepal length    7.2
sepal width     3.0
petal length    6.0
petal width     2.0
Name: 2246, dtype: float64
Point prediction: 2
Prediction set: [np.int64(2), np.int64(1)]
True label: 2


In [148]:
from deel.puncc import metrics

mean_coverage = metrics.classification_mean_coverage(y_test, set_pred)
mean_size = metrics.classification_mean_size(set_pred)

print(f"Empirical coverage : {mean_coverage:.2f}")
print(f"Average set size : {mean_size:.2f}")

Empirical coverage : 1.00
Average set size : 2.23


In [149]:
tf.random.set_seed(0)
tf.keras.utils.set_random_seed(0)

alpha = 0.05

# The `predict` returns the output of the convnet model `y_pred` and
# the calibrated prediction set `set_pred`.
y_pred, set_pred = model_cp.predict(x_test, alpha=alpha)
mean_coverage = metrics.classification_mean_coverage(y_test, set_pred)
mean_size = metrics.classification_mean_size(set_pred)

print(sum(1 for x in set_pred if len(x) == 0))

print(f"Empirical coverage : {mean_coverage:.2f}")
print(f"Average set size : {mean_size:.2f}")

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
0
Empirical coverage : 1.00
Average set size : 2.23


In [142]:
# Get nonconformity scores
nonconf_scores = model_cp.conformal_predictor.get_nonconformity_scores()[0]

# Size of the calibration set
n = len(nonconf_scores)

# Compute the calibrated treshold
calibrated_treshold = np.quantile(
    nonconf_scores, (1 - alpha) * (n + 1) / n, method="inverted_cdf"
)
print(f"Uncalibrated treshold : {1-alpha:.2f}")
print(f"Calibrated treshold : {calibrated_treshold:.2f}")

Uncalibrated treshold : 0.95
Calibrated treshold : 0.95
