# Evaluating Adaptivity for Conformal Prediction

Size-stratified coverage metric

Divide the observations into several groups depending on the size of prediction sets

In [1]:
import numpy as np
import torch
import math

## for kl_loss_corner

In [60]:
def compute_score(data_path):
    data = np.load(data_path, allow_pickle=True)
    tp = data.item()['tp']
    scores = []
    for i in range(len(tp)):
        prediction = np.array(tp[i][0])
        pred = prediction[:,:8]
        target = np.array(tp[i][1])
        cov = prediction[:,9:]
        score = np.abs(pred-target) / np.sqrt(np.exp(cov))
        for s in score:
            scores.append(list(s))
    return np.array(scores)

def compute_quantile(scores, alpha):
    n = scores.shape[0]
    q_level = np.ceil((n+1)*(1-alpha))/n
    quantile = []
    for i in range(scores.shape[1]):
        q = np.quantile(scores[:,i], q_level, interpolation='higher')
        quantile.append(q)
    return quantile

def get_range(data_path):
    data = np.load(data_path, allow_pickle=True)
    tp = data.item()['tp']
    sizes = []
    for i in range(len(tp)):
        prediction = np.array(tp[i][0])
        cov = prediction[:,9:]
        cov = np.sqrt(np.exp(cov))
        for c in cov:
            sizes.append(list(c))
    sizes = np.array(sizes) * 2
    print("max: {}".format(np.max(sizes, axis=0)))
    print("min: {}".format(np.min(sizes, axis=0)))
    sum_sizes = np.sum(sizes, axis=1)
    print(sum_sizes.shape)
    print(np.max(sum_sizes), np.min(sum_sizes))
    return

def divide_group_corner(data_path, num_group=10):
    data = np.load(data_path, allow_pickle=True)
    tp = data.item()['tp']
    sizes = []
    for i in range(len(tp)):
        prediction = np.array(tp[i][0])
        cov = prediction[:,9:]
        cov = np.sqrt(np.exp(cov))
        for c in cov:
            sizes.append(list(c))
    sizes = np.array(sizes) * 2
    sum_sizes = np.sum(sizes, axis=1)
    sum_sizes.sort()
    sum_sizes = sum_sizes
    divides = [0]
    num_in_each_group = sum_sizes.shape[0] // num_group
    for index, v in enumerate(sum_sizes):
        if (index != 0) and (index % num_in_each_group == 0):
            divides.append(v)
    groups = []
    for i in range(num_group - 1):
        groups.append(np.logical_and(divides[i] < sum_sizes, sum_sizes  <= divides[i+1]))
    groups.append(sum_sizes > divides[num_group-1])
    return groups

### disco

In [57]:
data_path = "check/check_loss_two_step_corner/disco/no_rsu/match_all_data_100_5.npy"
groups = divide_group_corner(data_path)

data size : (32881,)
0 0.08612249213197869
0.08612249213197869 0.2010650212460241
0.2010650212460241 0.2399124569523825
0.2399124569523825 0.2777768722642634
0.2777768722642634 0.32539610887189485
0.32539610887189485 0.37689525129267276
0.37689525129267276 0.4401605506880663
0.4401605506880663 0.5183131784311394
0.5183131784311394 0.6529042129644977
0.6529042129644977


In [59]:
np.array(groups).astype(float).sum(axis=1)

array([3289., 3288., 3288., 3288., 3288., 3288., 3288., 3288., 3288.,
       3288.])