# Evaluating Adaptivity for Conformal Prediction

Size-stratified coverage metric

Divide the observations into several groups depending on the size of prediction sets

In [4]:
import numpy as np
import torch
import math

## for kl_loss_corner

In [21]:
def compute_score(data_path):
    data = np.load(data_path, allow_pickle=True)
    tp = data.item()['tp']
    scores = []
    for i in range(len(tp)):
        prediction = np.array(tp[i][0])
        pred = prediction[:,:8]
        target = np.array(tp[i][1])
        cov = prediction[:,9:]
        score = np.abs(pred-target) / np.sqrt(np.exp(cov))
        for s in score:
            scores.append(list(s))
    return np.array(scores)

def compute_quantile(scores, alpha):
    n = scores.shape[0]
    q_level = np.ceil((n+1)*(1-alpha))/n
    quantile = []
    for i in range(scores.shape[1]):
        q = np.quantile(scores[:,i], q_level, interpolation='higher')
        quantile.append(q)
    return quantile

def get_range(data_path):
    data = np.load(data_path, allow_pickle=True)
    tp = data.item()['tp']
    sizes = []
    for i in range(len(tp)):
        prediction = np.array(tp[i][0])
        cov = prediction[:,9:]
        cov = np.sqrt(np.exp(cov))
        for c in cov:
            sizes.append(list(c))
    sizes = np.array(sizes) * 2
    print("max: {}".format(np.max(sizes, axis=0)))
    print("min: {}".format(np.min(sizes, axis=0)))
    sum_sizes = np.sum(sizes, axis=1)
    print(sum_sizes.shape)
    print(np.max(sum_sizes), np.min(sum_sizes))
    return

def divide_group(data_path, num_group=10):
    data = np.load(data_path, allow_pickle=True)
    tp = data.item()['tp']
    sizes = []
    for i in range(len(tp)):
        prediction = np.array(tp[i][0])
        cov = prediction[:,9:]
        cov = np.sqrt(np.exp(cov))
        for c in cov:
            sizes.append(list(c))
    sizes = np.array(sizes) * 2
    sum_sizes = np.sum(sizes, axis=1)
    sum_sizes.sort()
    sum_sizes = sum_sizes
    divides = [0]
    num_in_each_group = sum_sizes.shape[0] // num_group
    for index, v in enumerate(sum_sizes):
        if (index != 0) and (index % num_in_each_group == 0):
            divides.append(v)
    groups = []
    for i in range(num_group - 1):
        groups.append(np.logical_and(divides[i] < sum_sizes, sum_sizes  <= divides[i+1]))
    groups.append(sum_sizes > divides[num_group-1])
    return groups

def evaluate_ssc_corner(data_path_cal, data_path_test, alpha, num_group=10):
    scores_cal = compute_score(data_path_cal)
    scores_test = compute_score(data_path_test)
    qhat = np.array(compute_quantile(scores_cal, alpha))
    groups = divide_group(data_path_test, num_group)
    coverages = np.zeros((num_group,))
    for i in range(num_group):
        scores_test_d = scores_test[groups[i]]
        coverages[i] = (scores_test_d <= qhat).astype(float).mean()
    print("probabilies from small prediction sets to large prediction sets: ")
    print(coverages)
    scc = min(coverages)
    print("ssc: {}, 1-alpha: {}".format(scc, 1-alpha))
    print("diff: {}".format(1-alpha-scc))

def evaluate_ssc_corner_wo_cp( data_path_test, alpha, num_group=10, qr = 1):
    scores_test = compute_score(data_path_test)
    d_size = scores_test.shape[1]
    qhat = np.ones((1,d_size),dtype=np.int64) * qr
    groups = divide_group(data_path_test, num_group)
    coverages = np.zeros((num_group,))
    for i in range(num_group):
        scores_test_d = scores_test[groups[i]]
        coverages[i] = (scores_test_d <= qhat).astype(float).mean()
    print("probabilies from small prediction sets to large prediction sets: ")
    print(coverages)
    scc = min(coverages)
    print("ssc: {}, 1-alpha: {}".format(scc, 1-alpha))
    print("diff: {}".format(1-alpha-scc))

### disco

In [26]:
data_path_cal = "check/check_loss_two_step_corner_val/disco/no_rsu/match_all_data_100_5.npy"
data_path_test = "check/check_loss_two_step_corner/disco/no_rsu/match_all_data_100_5.npy"

In [27]:
# without conformal prediction
confident_range = 0.6827
evaluate_ssc_corner_wo_cp(data_path_test, 1-confident_range, qr=1)

probabilies from small prediction sets to large prediction sets: 
[0.19607023 0.22631539 0.20571016 0.19723236 0.19833485 0.21783759
 0.17765359 0.18381235 0.18293796 0.18400243]
ssc: 0.1776535888077859, 1-alpha: 0.6827
diff: 0.505046411192214


In [28]:
# without conformal prediction
confident_range = 0.9973
evaluate_ssc_corner_wo_cp(data_path_test, 1-confident_range, qr=3)

probabilies from small prediction sets to large prediction sets: 
[0.48943448 0.49403133 0.47194343 0.41754106 0.47137318 0.42575274
 0.44631995 0.43696776 0.43947689 0.48007908]
ssc: 0.41754105839416056, 1-alpha: 0.9973
diff: 0.5797589416058394


In [29]:
confident_range = 0.6827
evaluate_ssc_corner(data_path_cal, data_path_test, 1-confident_range)

probabilies from small prediction sets to large prediction sets: 
[0.70241715 0.69989355 0.68829836 0.61401308 0.67731144 0.5924194
 0.64347628 0.63530262 0.64982512 0.68506691]
ssc: 0.592419403892944, 1-alpha: 0.6827
diff: 0.09028059610705597


In [30]:
confident_range = 0.9973
evaluate_ssc_corner(data_path_cal, data_path_test, 1-confident_range)

probabilies from small prediction sets to large prediction sets: 
[0.96058832 0.97068887 0.96760949 0.90218218 0.977684   0.86465937
 0.93286192 0.91860554 0.92198905 0.93559915]
ssc: 0.8646593673965937, 1-alpha: 0.9973
diff: 0.1326406326034063


### upperbound

In [32]:
data_path_cal = "check/check_loss_two_step_corner_val/upperbound/no_rsu/match_all_data_100_5.npy"
data_path_test = "check/check_loss_two_step_corner/upperbound/no_rsu/match_all_data_100_5.npy"

In [33]:
# without conformal prediction
confident_range = 0.6827
evaluate_ssc_corner_wo_cp(data_path_test, 1-confident_range, qr=1)

probabilies from small prediction sets to large prediction sets: 
[0.2327637  0.21481727 0.17082965 0.15631447 0.19223401 0.16567197
 0.1926761  0.18538167 0.18420277 0.19303472]
ssc: 0.15631447096964338, 1-alpha: 0.6827
diff: 0.5263855290303566


In [34]:
# without conformal prediction
confident_range = 0.9973
evaluate_ssc_corner_wo_cp(data_path_test, 1-confident_range, qr=3)

probabilies from small prediction sets to large prediction sets: 
[0.55494991 0.53013557 0.4670277  0.41780872 0.52707781 0.43099764
 0.49443708 0.4824639  0.48614795 0.5087158 ]
ssc: 0.4178087238432066, 1-alpha: 0.9973
diff: 0.5794912761567934


In [35]:
confident_range = 0.6827
evaluate_ssc_corner(data_path_cal, data_path_test, 1-confident_range)

probabilies from small prediction sets to large prediction sets: 
[0.66444461 0.65550398 0.59895373 0.5358827  0.6532567  0.54203507
 0.60447981 0.59714854 0.60650604 0.62503678]
ssc: 0.5358826996758032, 1-alpha: 0.6827
diff: 0.14681730032419682


In [36]:
confident_range = 0.9973
evaluate_ssc_corner(data_path_cal, data_path_test, 1-confident_range)

probabilies from small prediction sets to large prediction sets: 
[0.95812463 0.96842765 0.91353522 0.87197907 0.95155467 0.84935897
 0.89688329 0.93125553 0.90421456 0.9208223 ]
ssc: 0.8493589743589743, 1-alpha: 0.9973
diff: 0.14794102564102563


### lowerbound

In [37]:
data_path_cal = "check/check_loss_two_step_corner_val/lowerbound/no_rsu/match_all_data_100_5.npy"
data_path_test = "check/check_loss_two_step_corner/lowerbound/no_rsu/match_all_data_100_5.npy"

In [38]:
# without conformal prediction
confident_range = 0.6827
evaluate_ssc_corner_wo_cp(data_path_test, 1-confident_range, qr=1)

probabilies from small prediction sets to large prediction sets: 
[0.19736842 0.21545894 0.19275362 0.25652174 0.21998792 0.22041063
 0.1682971  0.202657   0.19595411 0.18885673]
ssc: 0.16829710144927537, 1-alpha: 0.6827
diff: 0.5144028985507246


In [39]:
# without conformal prediction
confident_range = 0.9973
evaluate_ssc_corner_wo_cp(data_path_test, 1-confident_range, qr=3)

probabilies from small prediction sets to large prediction sets: 
[0.50838967 0.52089372 0.46388889 0.52300725 0.48925121 0.51183575
 0.40289855 0.48580918 0.43592995 0.46617221]
ssc: 0.4028985507246377, 1-alpha: 0.9973
diff: 0.5944014492753622


In [40]:
confident_range = 0.6827
evaluate_ssc_corner(data_path_cal, data_path_test, 1-confident_range)

probabilies from small prediction sets to large prediction sets: 
[0.73853211 0.75157005 0.68623188 0.72518116 0.68448068 0.71902174
 0.55863527 0.66624396 0.62306763 0.67516884]
ssc: 0.558635265700483, 1-alpha: 0.6827
diff: 0.12406473429951692


In [41]:
confident_range = 0.9973
evaluate_ssc_corner(data_path_cal, data_path_test, 1-confident_range)

probabilies from small prediction sets to large prediction sets: 
[0.99945678 1.         1.         0.99987923 0.99474638 0.99716184
 0.9897343  0.99927536 0.99432367 0.9998191 ]
ssc: 0.9897342995169082, 1-alpha: 0.9973
diff: 0.007565700483091797


## for kl_loss_center_ind 

In [43]:
def corner_to_center_box2d_torch(corners):
    p1 = corners[:,0:2]
    p2 = corners[:,2:4]
    p3 = corners[:,4:6]
    p4 = corners[:,6:8]
    center = (p1+p2+p3+p4)/4
    w = (np.sqrt(np.sum(np.power(p1-p4, 2), axis=1, keepdims = True)) + np.sqrt(np.sum(np.power(p2-p3, 2), axis=1, keepdims = True)))/2
    h = (np.sqrt(np.sum(np.power(p1-p2, 2), axis=1, keepdims = True)) + np.sqrt(np.sum(np.power(p3-p4, 2), axis=1, keepdims = True)))/2
    wp = np.sqrt(np.sum(np.power(p1-p4, 2), axis=1, keepdims = True))
    sina = np.divide((p4[:,1:2] - p1[:,1:2]), wp)
    cosa = np.divide((p1[:,0:1] - p4[:,0:1]), wp)
    result = np.concatenate((center, w, h, sina, cosa), axis=1)
    return result

def compute_score_center(data_path):
    data = np.load(data_path, allow_pickle=True)
    tp = data.item()['tp']
    scores = []
    for i in range(len(tp)):
        prediction = np.array(tp[i][0])
        pred = prediction[:,:8]
        target = np.array(tp[i][1])
        cov = prediction[:,9:]
        std = np.sqrt(np.exp(cov))
        pred = corner_to_center_box2d_torch(pred)
        target = corner_to_center_box2d_torch(target)
        score = np.abs(pred-target) / np.sqrt(np.exp(cov))
        for s in score:
            scores.append(list(s))
    return np.array(scores)

def compute_quantile_center(scores, alpha):
    n = scores.shape[0]
    q_level = np.ceil((n+1)*(1-alpha))/n
    quantile = []
    for i in range(scores.shape[1]):
        q = np.quantile(scores[:,i], q_level, interpolation='higher')
        quantile.append(q)
    return quantile

def evaluate_ssc_center(data_path_cal, data_path_test, alpha, num_group=10):
    scores_cal = compute_score_center(data_path_cal)
    scores_test = compute_score_center(data_path_test)
    qhat = np.array(compute_quantile_center(scores_cal, alpha))
    groups = divide_group(data_path_test, num_group)
    coverages = np.zeros((num_group,))
    for i in range(num_group):
        scores_test_d = scores_test[groups[i]]
        coverages[i] = (scores_test_d <= qhat).astype(float).mean()
    print("probabilies from small prediction sets to large prediction sets: ")
    print(coverages)
    scc = min(coverages)
    print("ssc: {}, 1-alpha: {}".format(scc, 1-alpha))
    print("diff: {}".format(1-alpha-scc))

def evaluate_ssc_center_wo_cp( data_path_test, alpha, num_group=10, qr = 1):
    scores_test = compute_score_center(data_path_test)
    d_size = scores_test.shape[1]
    qhat = np.ones((1,d_size),dtype=np.int64) * qr
    groups = divide_group(data_path_test, num_group)
    coverages = np.zeros((num_group,))
    for i in range(num_group):
        scores_test_d = scores_test[groups[i]]
        coverages[i] = (scores_test_d <= qhat).astype(float).mean()
    print("probabilies from small prediction sets to large prediction sets: ")
    print(coverages)
    scc = min(coverages)
    print("ssc: {}, 1-alpha: {}".format(scc, 1-alpha))
    print("diff: {}".format(1-alpha-scc))

### disco

In [44]:
data_path_cal = "check/check_loss_two_step_center_ind_val/disco/no_rsu/match_all_data_100_5.npy"
data_path_test = "check/check_loss_two_step_center_ind/disco/no_rsu/match_all_data_100_5.npy"

In [45]:
# without conformal prediction
confident_range = 0.6827
evaluate_ssc_center_wo_cp(data_path_test, 1-confident_range, qr=1)

probabilies from small prediction sets to large prediction sets: 
[0.36111111 0.35355605 0.33438386 0.31857338 0.34021431 0.32004412
 0.32151487 0.33732535 0.32955142 0.35592686]
ssc: 0.31857337955667614, 1-alpha: 0.6827
diff: 0.36412662044332383


In [46]:
# without conformal prediction
confident_range = 0.9973
evaluate_ssc_center_wo_cp(data_path_test, 1-confident_range, qr=3)

probabilies from small prediction sets to large prediction sets: 
[0.56285444 0.55903982 0.53556046 0.52284904 0.54716882 0.53587562
 0.5239521  0.54538292 0.53062296 0.5600042 ]
ssc: 0.5228490387645761, 1-alpha: 0.9973
diff: 0.47445096123542385


In [47]:
confident_range = 0.6827
evaluate_ssc_center(data_path_cal, data_path_test, 1-confident_range)

probabilies from small prediction sets to large prediction sets: 
[0.70321361 0.70963336 0.6928774  0.64917533 0.68715201 0.65216935
 0.66188675 0.69975838 0.67107889 0.71337747]
ssc: 0.6491753335434395, 1-alpha: 0.6827
diff: 0.03352466645656049


In [48]:
confident_range = 0.9973
evaluate_ssc_center(data_path_cal, data_path_test, 1-confident_range)

probabilies from small prediction sets to large prediction sets: 
[0.9921235  0.99243618 0.9940645  0.97573275 0.99353924 0.9734741
 0.98256119 0.98387436 0.98298141 0.98591845]
ssc: 0.9734741044227335, 1-alpha: 0.9973
diff: 0.023825895577266443


### upperbound

In [49]:
data_path_cal = "check/check_loss_two_step_center_ind_val/upperbound/no_rsu/match_all_data_100_5.npy"
data_path_test = "check/check_loss_two_step_center_ind/upperbound/no_rsu/match_all_data_100_5.npy"

In [50]:
# without conformal prediction
confident_range = 0.6827
evaluate_ssc_center_wo_cp(data_path_test, 1-confident_range, qr=1)

probabilies from small prediction sets to large prediction sets: 
[0.34052505 0.36467451 0.3147701  0.32005232 0.34399839 0.34173458
 0.32266828 0.32935909 0.34294195 0.35399698]
ssc: 0.31477009759533153, 1-alpha: 0.6827
diff: 0.36792990240466844


In [51]:
# without conformal prediction
confident_range = 0.9973
evaluate_ssc_center_wo_cp(data_path_test, 1-confident_range, qr=3)

probabilies from small prediction sets to large prediction sets: 
[0.54993965 0.57299527 0.52439883 0.52756817 0.5497535  0.53773015
 0.53481236 0.56580139 0.56756213 0.58200101]
ssc: 0.5243988328805715, 1-alpha: 0.9973
diff: 0.4729011671194284


In [52]:
confident_range = 0.6827
evaluate_ssc_center(data_path_cal, data_path_test, 1-confident_range)

probabilies from small prediction sets to large prediction sets: 
[0.65867029 0.66480531 0.64634269 0.62486166 0.65912064 0.63135124
 0.64211691 0.65097092 0.67486669 0.6743087 ]
ssc: 0.6248616561022236, 1-alpha: 0.6827
diff: 0.05783834389777642


In [53]:
confident_range = 0.9973
evaluate_ssc_center(data_path_cal, data_path_test, 1-confident_range)

probabilies from small prediction sets to large prediction sets: 
[0.99014283 0.99079384 0.97836805 0.96664654 0.98797666 0.95492504
 0.97343797 0.97560117 0.97484656 0.97968829]
ssc: 0.9549250427608411, 1-alpha: 0.9973
diff: 0.04237495723915885


### lowerbound

In [54]:
data_path_cal = "check/check_loss_two_step_center_ind_val/lowerbound/no_rsu/match_all_data_100_5.npy"
data_path_test = "check/check_loss_two_step_center_ind/lowerbound/no_rsu/match_all_data_100_5.npy"

In [55]:
# without conformal prediction
confident_range = 0.6827
evaluate_ssc_center_wo_cp(data_path_test, 1-confident_range, qr=1)

probabilies from small prediction sets to large prediction sets: 
[0.37215724 0.42857143 0.35324232 0.37794572 0.37713311 0.39192264
 0.35210466 0.37485779 0.37493905 0.40487805]
ssc: 0.3521046643913538, 1-alpha: 0.6827
diff: 0.3305953356086462


In [56]:
# without conformal prediction
confident_range = 0.9973
evaluate_ssc_center_wo_cp(data_path_test, 1-confident_range, qr=3)

probabilies from small prediction sets to large prediction sets: 
[0.55279402 0.60685844 0.53258573 0.57711685 0.57727938 0.58077361
 0.51990899 0.59865106 0.56777182 0.6095122 ]
ssc: 0.5199089874857793, 1-alpha: 0.9973
diff: 0.47739101251422067


In [57]:
confident_range = 0.6827
evaluate_ssc_center(data_path_cal, data_path_test, 1-confident_range)

probabilies from small prediction sets to large prediction sets: 
[0.65115335 0.69055745 0.64708272 0.67528035 0.66934829 0.67804323
 0.60385178 0.70315293 0.65862181 0.70390244]
ssc: 0.6038517796196977, 1-alpha: 0.6827
diff: 0.07884822038030226


In [58]:
confident_range = 0.9973
evaluate_ssc_center(data_path_cal, data_path_test, 1-confident_range)

probabilies from small prediction sets to large prediction sets: 
[0.98131904 0.99041118 0.97472778 0.98000975 0.97805948 0.9778157
 0.93848529 0.96635787 0.96936454 0.96577236]
ssc: 0.9384852917276125, 1-alpha: 0.9973
diff: 0.05881470827238744
