In [1]:
import json

import numpy as np
import torch
import matplotlib.pyplot as plt

# Increase Experts 

In [2]:
# === OvA ===
n_classes = 10
confs = []
exps = []
true = []
path = "ova_increase_experts/"
n_experts = [1, 2, 4, 6, 8]
for n in n_experts:
    model_name = '_' + str(n) + '_experts'
    with open(path + 'confidence_multiple_experts' + model_name + '.txt', 'r') as f:
        conf = json.loads(json.load(f))
    with open(path + 'expert_predictions_multiple_experts' + model_name + '.txt', 'r') as f:
        exp_pred = json.loads(json.load(f))
    with open(path + 'true_label_multiple_experts' + model_name + '.txt', 'r') as f:
        true_label = json.loads(json.load(f))
    true.append(true_label['test'])
    exps.append(exp_pred['test'])
    c = torch.tensor(conf['test'])
    print(c.shape)
    # DANI Correction ===
    c = c.sigmoid()
    # DANI Correction ===
    confs.append(c)

torch.Size([9216, 11])
torch.Size([9216, 12])
torch.Size([9216, 14])
torch.Size([9216, 16])
torch.Size([9216, 18])


# Increase confidence 

In [44]:
# === OvA ===
n_classes = 10
random_expert_idx = 0
confs = []
exps = []
true = []
path = "ova_increase_confidence/"
n_experts = 4
p_experts = [0.2, 0.4, 0.6, 0.8, 0.95]
for p in p_experts:
    model_name = '_' + str(p) + '_confidence'
    with open(path + 'confidence_multiple_experts' + model_name + '.txt', 'r') as f:
        conf = json.loads(json.load(f))
    with open(path + 'expert_predictions_multiple_experts' + model_name + '.txt', 'r') as f:
        exp_pred = json.loads(json.load(f))
    with open(path + 'true_label_multiple_experts' + model_name + '.txt', 'r') as f:
        true_label = json.loads(json.load(f))
    true.append(true_label['test'])
    exps.append(exp_pred['test'])
    c = torch.tensor(conf['test'])
    # DANI Correction ===
    c = c.sigmoid()
    # DANI Correction ===
    confs.append(c)

In [62]:
# 1 expert rando, 3 with prob 0.8 correct
probs = confs[-3]
experts = exps[-3]
experts = experts[::-1] 
y_true = true[-3]

In [63]:
n_val = int(0.8 * len(y_true))
n_test = len(y_true) - n_val
print("N val:{}".format(n_val))
print("N test:{}".format(n_test))

N val:7372
N test:1844


# Validation

In [64]:
## 2. get Q_hat

n_classes_exp = n_classes + n_experts
probs_val = probs[:n_val, 10:]

# experts_val = experts[::-1]  # IMPORTANT! swap to match prob ordering
experts_val = experts
experts_val = [exp[:n_val] for exp in experts_val]

y_true_val = y_true[:n_val]

# === Only on deferred samples
_, predicted = torch.max(probs[:n_val].data, 1)
r = (predicted >= n_classes_exp - n_experts)

# Filter 
probs_val = probs_val[r]
experts_val = [np.array(exp)[r] for exp in experts_val]
y_true_val = np.array(y_true_val)[r]

# Model expert probs ===
# Sort J model outputs for experts
probs_experts = probs[:n_val, 10:]
probs_experts = probs_experts[r]
sort, pi = probs_experts.sort(dim=1, descending=True)

# Correctness experts ===
# Check if experts are correct 
correct_exp = (np.array(experts_val) == np.array(y_true_val)).T
# idx for correct experts: [[0,1,2], [1,2], [], ...]
correct_exp_idx = [np.where(correct_exp_i)[0] for correct_exp_i in correct_exp]

# obtain the last expert to be retrieved. If empty, then add all values.
# indexes are not the real expert index, but the sorted indexes, e.g. [[1, 0 ,2],  [1,0], [], ...]
pi_corr_exp = [probs_experts[i, corr_exp].sort(descending=True)[1] for i, corr_exp in enumerate(correct_exp)]
pi_corr_exp_stop = [pi_corr_exp_i[-1] if len(pi_corr_exp_i)!=0 else -1 for pi_corr_exp_i in pi_corr_exp]  # last expert

# obtain real expert index back, e.g. [2,1,-1,...]
pi_stop = [correct_exp_idx[i][pi_corr_exp_stop_i] if len(correct_exp_idx[i])!=0 else -1 for i, pi_corr_exp_stop_i in enumerate(pi_corr_exp_stop)]


# =========
n_val = n_val
alpha = 0.1
scores = sort.cumsum(dim=1).gather(1, pi.argsort(1))[range(len(torch.tensor(pi_stop))), torch.tensor(pi_stop)]
qhat = torch.quantile(scores, np.ceil((r.sum() + 1) * (1 - alpha)) / r.sum(), interpolation="higher")

qhat

tensor(1.9849)

# Test

In [65]:
n_classes = 10
n_experts = 4
n_classes_exp = n_classes + n_experts

In [121]:
probs_test = probs[n_val:, 10:]
experts_test = [exp[n_val:] for exp in experts]
y_true_test = y_true[n_val:]

# === Only on deferred samples
_, predicted = torch.max(probs[n_val:].data, 1)
r_test = (predicted >= n_classes_exp - n_experts)

# Filter 
probs_test = probs_test[r_test]
experts_test = [np.array(exp)[r_test] for exp in experts_test]
y_true_test = np.array(y_true_test)[r_test]

# Sort J model outputs for experts. sorted probs and sorted indexes
sort_test, pi_test = probs_test.sort(dim=1, descending=True)
# Get last sorted index to be below Q_hat
pi_stop = (sort_test.cumsum(dim=1) <= qhat).sum(axis=1)

# Prediction sets
prediction_sets = [(pi_test[i][:(pi_stop[i])]).numpy() for i in range(pi_stop.shape[0])]  # not allow empty sets
prediction_sets

[array([2, 1, 0]),
 array([2, 0, 1, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 0, 1, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0]),
 array([2, 1, 0, 3]),
 array([1, 2, 0, 3]),
 array([2, 1, 0]),
 array([2, 1]),
 array([2, 1, 0, 3]),
 array([2, 1, 0]),
 array([1, 2, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([1, 2, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0]),
 array([2, 1, 0, 3]),
 array([2, 0, 1, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0]),
 array([2, 1, 0, 3]),
 array([2, 1, 0]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([1, 2, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1]),
 array([2, 1, 0, 3]),
 array([2, 1, 0]),
 array([2, 1, 0, 3]),
 array([2, 1, 0]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 

In [135]:
prediction_sets

[array([0, 1, 2]),
 array([0, 1, 2, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 0, 1, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0]),
 array([2, 1, 0, 3]),
 array([1, 2, 0, 3]),
 array([2, 1, 0]),
 array([2, 1]),
 array([2, 1, 0, 3]),
 array([2, 1, 0]),
 array([1, 2, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([1, 2, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0]),
 array([2, 1, 0, 3]),
 array([2, 0, 1, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0]),
 array([2, 1, 0, 3]),
 array([2, 1, 0]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([1, 2, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1]),
 array([2, 1, 0, 3]),
 array([2, 1, 0]),
 array([2, 1, 0, 3]),
 array([2, 1, 0]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 

In [134]:
prediction_sets

[array([0, 1, 2]),
 array([0, 1, 2, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 0, 1, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0]),
 array([2, 1, 0, 3]),
 array([1, 2, 0, 3]),
 array([2, 1, 0]),
 array([2, 1]),
 array([2, 1, 0, 3]),
 array([2, 1, 0]),
 array([1, 2, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([1, 2, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0]),
 array([2, 1, 0, 3]),
 array([2, 0, 1, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0]),
 array([2, 1, 0, 3]),
 array([2, 1, 0]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([1, 2, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1]),
 array([2, 1, 0, 3]),
 array([2, 1, 0]),
 array([2, 1, 0, 3]),
 array([2, 1, 0]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 1, 0, 3]),
 array([2, 

In [79]:
[prediction_sets[i] for i, prediction_sets_i in enumerate(prediction_sets) if mask[i]]

[tensor([1, 2, 0, 3]),
 tensor([2, 1, 0, 3]),
 tensor([2, 1, 0]),
 tensor([2, 1, 0, 3]),
 tensor([2, 1, 0, 3]),
 tensor([2, 1, 0, 3]),
 tensor([1, 2, 0, 3]),
 tensor([2, 1]),
 tensor([2, 1, 0]),
 tensor([2, 1, 0]),
 tensor([2, 1, 0, 3]),
 tensor([2, 1, 0, 3]),
 tensor([2, 1, 0, 3]),
 tensor([2, 1, 0, 3]),
 tensor([2, 1, 0, 3]),
 tensor([2, 1]),
 tensor([2, 1, 0]),
 tensor([1, 2, 0, 3]),
 tensor([1, 2, 0, 3]),
 tensor([2, 1, 0, 3])]

In [80]:
# Check if experts are correct 
# experts_test = experts_test[::-1]
correct_exp = (np.array(experts_test) == np.array(y_true_test)).T

correct_exp_idx = [np.where(correct_exp_i)[0] for correct_exp_i in correct_exp]
correct_exp_idx

[array([0, 2]),
 array([1, 2]),
 array([2]),
 array([0, 1, 2]),
 array([0, 2]),
 array([], dtype=int64),
 array([], dtype=int64),
 array([1]),
 array([0, 1]),
 array([1, 2]),
 array([1, 2]),
 array([1, 2]),
 array([0, 1, 2]),
 array([0, 2]),
 array([2]),
 array([], dtype=int64),
 array([2]),
 array([2]),
 array([], dtype=int64),
 array([0, 1, 2]),
 array([], dtype=int64),
 array([1]),
 array([0, 1]),
 array([2]),
 array([0, 1, 2]),
 array([0, 1, 2]),
 array([0, 1, 2]),
 array([1]),
 array([0, 1, 2]),
 array([], dtype=int64),
 array([1, 2]),
 array([1, 2]),
 array([2]),
 array([0, 1, 2]),
 array([1]),
 array([], dtype=int64),
 array([1, 2]),
 array([2]),
 array([], dtype=int64),
 array([1, 2]),
 array([1]),
 array([0, 1, 2]),
 array([0, 1, 2]),
 array([2]),
 array([0]),
 array([0, 1, 2]),
 array([], dtype=int64),
 array([], dtype=int64),
 array([1]),
 array([0, 2]),
 array([0, 1, 2]),
 array([1, 2]),
 array([0, 1, 2]),
 array([0, 2]),
 array([0, 2]),
 array([], dtype=int64),
 array([0, 

In [81]:
[correct_exp_idx[i] for i, corr_exp_idx in enumerate(correct_exp_idx) if mask[i]]

[array([1, 2]),
 array([0, 1, 2]),
 array([0, 2]),
 array([0, 1]),
 array([0, 1, 2]),
 array([], dtype=int64),
 array([], dtype=int64),
 array([2]),
 array([1, 2]),
 array([0, 1, 2]),
 array([0, 2]),
 array([0, 1]),
 array([1, 2]),
 array([0, 2]),
 array([0, 2]),
 array([0, 1, 2]),
 array([0, 1, 2, 3]),
 array([0]),
 array([0, 1, 2]),
 array([1, 2])]

In [78]:
mask =( y_true_test==2)
mask.sum()

20

In [82]:
r.sum()/len(r)

tensor(0.0722)

In [83]:
np.array(experts_test)[:,mask]

array([[4, 2, 2, 2, 2, 7, 3, 0, 3, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 5],
       [2, 2, 9, 2, 2, 8, 5, 3, 2, 2, 7, 2, 2, 5, 8, 2, 2, 1, 2, 2],
       [2, 2, 2, 6, 2, 8, 3, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 3, 2, 2],
       [7, 7, 7, 8, 9, 1, 3, 3, 0, 4, 6, 1, 3, 5, 0, 8, 2, 8, 8, 6]])

In [84]:
probs_test[mask]

tensor([[0.6075, 0.6305, 0.6299, 0.0925],
        [0.5902, 0.6056, 0.6182, 0.0934],
        [0.6232, 0.6599, 0.6620, 0.1004],
        [0.6073, 0.6319, 0.6345, 0.0997],
        [0.5959, 0.6187, 0.6273, 0.0934],
        [0.5049, 0.5125, 0.5203, 0.0897],
        [0.5651, 0.5817, 0.5793, 0.0974],
        [0.6408, 0.6894, 0.7158, 0.0908],
        [0.6227, 0.6596, 0.6710, 0.0972],
        [0.6158, 0.6408, 0.6551, 0.1030],
        [0.4767, 0.4868, 0.5001, 0.1108],
        [0.5961, 0.6142, 0.6508, 0.1001],
        [0.5836, 0.6143, 0.6255, 0.0969],
        [0.4973, 0.5253, 0.5301, 0.0998],
        [0.5880, 0.6158, 0.6189, 0.0979],
        [0.6402, 0.6672, 0.6830, 0.0962],
        [0.6093, 0.6523, 0.6721, 0.0862],
        [0.5011, 0.5169, 0.5119, 0.1009],
        [0.5955, 0.6202, 0.6200, 0.1042],
        [0.5513, 0.5730, 0.5833, 0.0868]])