# Demo on active testing from paper
Active Testing: Sample–Efficient Model Evaluation

Active Surrogate Estimators: An Active Learning Approach to Label–Efficient Model Evaluation

In [17]:
import os, sys
import torch, json
import numpy as np

from main import build_model_main
from util.slconfig import SLConfig
from datasets import build_dataset
from util.visualizer import COCOVisualizer
from util import box_ops
import pickle
import copy
import random
from util.utils import slprint, to_device
import util.misc as utils

In [2]:
result_path = "./results/deep_ensemble/"
consider_model_results = {"DINO_0011_4scale.pkl", "DINO_0011_5scale.pkl", "DINO_0023_4scale.pkl", "DINO_0022_5scale.pkl", "DINO_0033_4scale.pkl", "DINO_0031_5scale.pkl"}
ego_model_index = 5
model_checkpoint_path = "ckpts/checkpoint0031_5scale.pth" # change the path of the model checkpoint
result_json_path = "./results/surr_ensemble_compare/"
model_nums = len(consider_model_results)
sample_size_set = [50, 100, 150, 200, 250, 500, 750, 1000, 1500, 2000, 3000]
random_seed_set = [4519, 9524, 5901, 1028, 6382, 5383, 5095, 7635,  890,  608]

## Load model results

In [3]:
# ['pred_boxes', 'pred_logits', 'scores', 'labels', 'boxes']
deep_ensemble_results = []
for path in consider_model_results:
    temp_path = result_path + path
    with open(temp_path, "rb") as outfile:
         deep_ensemble_results.append(pickle.load(outfile))

## Load data set

In [4]:
scale_config = "5scale"
model_config_path = "config/DINO/DINO_" + scale_config + ".py" # change the path of the model config file
args = SLConfig.fromfile(model_config_path) 
args.device = 'cuda' 
args.dataset_file = 'coco'
args.coco_path = "../coco/" # the path of coco
args.fix_size = False
dataset_val = build_dataset(image_set='val', args=args)

data_aug_params: {
  "scales": [
    480,
    512,
    544,
    576,
    608,
    640,
    672,
    704,
    736,
    768,
    800
  ],
  "max_size": 1333,
  "scales2_resize": [
    400,
    500,
    600
  ],
  "scales2_crop": [
    384,
    600
  ]
}
loading annotations into memory...
Done (t=2.71s)
creating index...
index created!


## Model results postprocess
Include: remove prediction with low score, prediction matching

In [5]:
# output_dict = ['pred_boxes', 'pred_logits', 'scores', 'labels', 'boxes']
score_threshold = 0
max_num_select = 300
pro_results = copy.deepcopy(deep_ensemble_results)
for agent in range(model_nums):
    for img_idx in range(len(pro_results[agent])):
        pred_logits = pro_results[agent][img_idx]['pred_logits']
        pred_boxes = pro_results[agent][img_idx]['pred_boxes']
        if pred_logits is None:
            continue
        # only consider prediction with score larger than score_threshold
        select_mask = pred_logits > score_threshold
        select_idx_all = np.reshape(select_mask, -1).nonzero()[0]
        select_idx = select_idx_all // pred_logits.shape[2]
        assert len(select_idx) <= max_num_select
        if len(select_idx) <= 0:
            pro_results[agent][img_idx]['pred_logits']  = None
            pro_results[agent][img_idx]['lables']  = None
            pro_results[agent][img_idx]['scores']  = None
            pro_results[agent][img_idx]['pred_boxes']  = None
            continue
        lables = select_idx_all % pred_logits.shape[2]
        scores = pred_logits[select_mask]
        pred_boxes = pred_boxes[:, select_idx]
        pred_logits = pred_logits[:, select_idx]
        pro_results[agent][img_idx]['pred_logits'] = torch.from_numpy(pred_logits)
        pro_results[agent][img_idx]['lables'] = torch.from_numpy(lables)
        pro_results[agent][img_idx]['scores'] = torch.from_numpy(scores)
        pro_results[agent][img_idx]['pred_boxes'] = torch.from_numpy(pred_boxes)

In [6]:
pro_results[0][0]['pred_boxes'].shape, pro_results[0][0]['pred_logits'].shape, pro_results[0][0]['lables'].shape, pro_results[0][0]['scores'].shape

(torch.Size([1, 8, 4]),
 torch.Size([1, 8, 91]),
 torch.Size([8]),
 torch.Size([8]))

In [7]:
from util.box_ops import box_cxcywh_to_xyxy, generalized_box_iou
from scipy.optimize import linear_sum_assignment
def hungarian_matching(outputs, targets, cost_class = 2.0, cost_bbox = 5.0, cost_giou = 2.0, focal_alpha = 0.25, cost_threshold = 2):
    """ Performs the matching
    Params:
        outputs/targets: This is a dict that contains at least these entries:
             "pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits, batch_size = 1
             "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates
             "lables": Tensor of dim [num_queries] with the label of each predicted box
        cost_threshold: threshold for distance between 
    Returns:
        A list of size batch_size, containing tuples of (index_i, index_j) where:
            - index_i is the indices of the selected predictions (in order)
            - index_j is the indices of the corresponding selected targets (in order with high priority)
        For each batch element, it holds:
            len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
    """
    if outputs is None or targets is None or outputs["pred_logits"] is None or targets["pred_logits"] is None:
        return []
    assert outputs["pred_logits"].shape[0] == targets["pred_logits"].shape[0]
    bs, num_queries = outputs["pred_logits"].shape[:2]
    assert bs == 1
    
    # We flatten to compute the cost matrices in a batch
    out_prob = outputs["pred_logits"].flatten(0, 1).sigmoid()  # [batch_size * num_queries, num_classes]
    out_bbox = outputs["pred_boxes"].flatten(0, 1)  # [batch_size * num_queries, 4]
    
    tgt_ids = targets["lables"]
    tgt_bbox = targets["pred_boxes"].flatten(0, 1)
    
    # Compute the classification cost.
    alpha = focal_alpha
    gamma = 2.0
    neg_cost_class = (1 - alpha) * (out_prob ** gamma) * (-(1 - out_prob + 1e-8).log())
    pos_cost_class = alpha * ((1 - out_prob) ** gamma) * (-(out_prob + 1e-8).log())
    cost_class = pos_cost_class[:, tgt_ids] - neg_cost_class[:, tgt_ids]
    
    # Compute the L1 cost between boxes
    cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1)
    
    # Compute the giou cost betwen boxes            
    cost_giou = -generalized_box_iou(box_cxcywh_to_xyxy(out_bbox), box_cxcywh_to_xyxy(tgt_bbox))
    
    # Final cost matrix
    C = cost_bbox * cost_bbox + cost_class * cost_class + cost_giou * cost_giou
    C = C.view(num_queries, -1)
    
    # num_target_boxes = targets["pred_logits"].shape[1]
    # indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(num_target_boxes, -1))]
    indices = linear_sum_assignment(C)
    indices = [indices[0][np.argsort(indices[1])], np.sort(indices[1])]
    select_mask = [True if C[indices[0][i],indices[1][i]] <= cost_threshold else False for i in range(len(indices[0]))]
    # return [torch.as_tensor(indices[0][select_mask], dtype=torch.int64), torch.as_tensor(indices[1][select_mask], dtype=torch.int64)], C, cost_class, cost_bbox, cost_giou
    # return [torch.as_tensor(indices[0][select_mask], dtype=torch.int64), torch.as_tensor(indices[1][select_mask], dtype=torch.int64)]
    return [indices[0][select_mask], indices[1][select_mask]]

In [8]:
indices = hungarian_matching(pro_results[0][0], pro_results[5][0])
indices

[array([3, 2, 4, 5, 7, 6, 0]), array([0, 1, 2, 3, 4, 6, 7])]

## Acquisition Function

In [9]:
# reference to ASE code AnySurrogateAcquisitionEntropy + LazySurrEnsemble
def mean_surr_predict(surr_model_preds):
    return np.mean(surr_model_preds, axis=1)

def entropy_loss(surr_model_preds, model_preds):
    surr_predicts = np.mean(surr_model_preds, axis=1)
    return -1 * (surr_predicts * np.log(model_preds)).sum(axis=1)

def random_sample_loss(surr_model_preds, model_preds):
    average_loss = np.ones((model_preds.shape[1]), dtype = np.single)
    return average_loss

def get_pred_class_prob_after_matching(inputs, ego_matched_lists, ego_model_index):
    # if the ensemble model has no prediction matched the ego model, which means it predicts the class is background (0) with probability 1
    # output is [objects_num, surr_model_nums , class_nums]
    ego_pred_nums = inputs[ego_model_index]["pred_logits"].shape[1]
    surr_model_nums = len(inputs)
    class_nums = inputs[ego_model_index]["pred_logits"].shape[2]
    background_prob = np.zeros((class_nums), dtype = np.single)
    background_prob[0] = 1
    out_pred_logits = np.zeros((ego_pred_nums, surr_model_nums , class_nums))
    for pred_idx in range(ego_pred_nums):
        for i in range(surr_model_nums):
            if ego_matched_lists[pred_idx][i] != -1:
                temp = inputs[i]["pred_logits"][0][ego_matched_lists[pred_idx][i]].sigmoid().numpy()
                out_pred_logits[pred_idx, i] = temp / temp.sum()
            else:
                out_pred_logits[pred_idx, i] = np.copy(background_prob)
    return out_pred_logits

def get_acquistion_expected_loss(pro_results, model_nums, ego_model_index, loss_type = "min", get_loss = entropy_loss):
    losses = np.zeros(len(pro_results[0]))
    for img_idx in range(len(pro_results[0])):
        if pro_results[ego_model_index][img_idx]["pred_logits"] is None:
            losses[img_idx] = -1000 # template set -1000
            continue
        inputs = [pro_results[i][img_idx] for i in range(model_nums)]
        ego_pred_nums = inputs[ego_model_index]["pred_logits"].shape[1]
        ego_matched_lists = -np.ones((ego_pred_nums, model_nums), dtype=int)
        ego_matched_lists[:, ego_model_index] = np.arange(ego_pred_nums)
        for agent in range(model_nums):
            if agent == ego_model_index:
                continue
            match_indices = hungarian_matching(inputs[agent], inputs[ego_model_index])
            if len(match_indices) == 0 or len(match_indices[0]) == 0:
                continue
            ego_matched_lists[match_indices[1], agent] = match_indices[0]

        inputs = get_pred_class_prob_after_matching(inputs, ego_matched_lists, ego_model_index)
        loss = get_loss(inputs, inputs[:, ego_model_index])
        if loss_type == "min":
            losses[img_idx] = loss.min()
        elif loss_type == "max":
            losses[img_idx] = loss.max()
        elif loss_type == "mean":
            losses[img_idx] = loss.mean()
        else:
            assert False
    # set the loss to be the maximum when prediction of ego model is None
    selected_mask = losses == -1000
    losses[selected_mask] = losses.max()
    return losses

def LURE_weights_for_risk_estimator(weights, N):
    M = weights.size
    if M < N:
        m = np.arange(1, M+1)
        v = (
            1
            + (N-M)/(N-m) * (
                    1 / ((N-m+1) * weights)
                    - 1
                    )
            )
    else:
        v = 1

    return v

def acquire(expected_loss_inputs, samples_num):
    assert samples_num <= expected_loss_inputs.size
    expected_loss = np.copy(expected_loss_inputs)
    # Log-lik can be negative.
    # Make all values positive.
    if (expected_loss < 0).sum() > 0:
        expected_loss += np.abs(expected_loss.min())
    
    if np.any(np.isnan(expected_loss)):
        logging.warning(
            'Found NaN values in expected loss, replacing with 0.')
        logging.info(f'{expected_loss}')
        expected_loss = np.nan_to_num(expected_loss, nan=0)
    pick_sample_idxs = np.zeros((samples_num), dtype = int)
    idx_array = np.arange(expected_loss.size)
    weights = np.zeros((samples_num), dtype = np.single)
    uniform_clip_val = 0.2
    for i in range(samples_num):
        expected_loss /= expected_loss.sum()
        # clip all values less than 10 percent of uniform propability
        expected_loss = np.maximum(uniform_clip_val * 1/expected_loss.size, expected_loss)
        expected_loss /= expected_loss.sum()
        sample = np.random.multinomial(1, expected_loss)
        cur_idx = np.where(sample)[0][0]
        pick_sample_idxs[i] = idx_array[cur_idx]
        weights[i] = expected_loss[cur_idx]
        selected_mask = np.ones((expected_loss.size), dtype=bool)
        selected_mask[cur_idx] = False
        expected_loss = expected_loss[selected_mask]
        idx_array = idx_array[selected_mask]
    return pick_sample_idxs, weights

In [10]:
# pick_sample_idxs, weights = acquire(expected_losses, 10)

## Get True Loss for each samples, computation efficiency

In [11]:
model, criterion, postprocessors = build_model_main(args)
checkpoint = torch.load(model_checkpoint_path, map_location='cpu')
model.load_state_dict(checkpoint['model'])
_ = model.eval()
_ = criterion.eval()

  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "


In [12]:
def get_whole_data_set_risk_estimator():
    samples_num = len(deep_ensemble_results[ego_model_index])
    true_losses = np.zeros((samples_num))
    device = torch.device(args.device)
    for img_idx in range(samples_num):
        imgs, targets = dataset_val[img_idx]
        targets = [{k: to_device(v, device) for k, v in targets.items()}]
        outputs = {k: to_device(torch.from_numpy(v), device) for k, v in deep_ensemble_results[ego_model_index][img_idx].items()}
        outputs['dn_meta'] = None
        loss_dict = criterion(outputs, targets)
        weight_dict = criterion.weight_dict
        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        loss_dict_reduced_scaled = {k: v * weight_dict[k]
                                    for k, v in loss_dict_reduced.items() if k in weight_dict}
        loss_dict_reduced_unscaled = {f'{k}_unscaled': v
                                      for k, v in loss_dict_reduced.items()}
        loss=sum(loss_dict_reduced_scaled.values())
        true_losses[img_idx] = loss.cpu().numpy()

    return true_losses

In [18]:
true_losses = get_whole_data_set_risk_estimator()

## surrogate deep ensemble loss risk estimation

In [19]:
# not used for loss calculation efficiency
# import random
# def run_one_surr_ensemble_risk_estimator(expected_losses, seed, samples_num):
#     torch.manual_seed(seed)
#     np.random.seed(seed)
#     random.seed(seed)
#     pick_sample_idxs, weights = acquire(expected_losses, samples_num)
#     risk_estimator_weights = LURE_weights_for_risk_estimator(weights, expected_losses.size)
#     true_losses = np.zeros((samples_num))
#     device = torch.device(args.device)
#     for i in range(samples_num):
#         img_idx = pick_sample_idxs[i]
#         imgs, targets = dataset_val[img_idx]
#         targets = [{k: to_device(v, device) for k, v in targets.items()}]
#         outputs = {k: to_device(torch.from_numpy(v), device) for k, v in deep_ensemble_results[ego_model_index][img_idx].items()}
#         outputs['dn_meta'] = None
#         loss_dict = criterion(outputs, targets)
#         weight_dict = criterion.weight_dict
#         # reduce losses over all GPUs for logging purposes
#         loss_dict_reduced = utils.reduce_dict(loss_dict)
#         loss_dict_reduced_scaled = {k: v * weight_dict[k]
#                                     for k, v in loss_dict_reduced.items() if k in weight_dict}
#         loss_dict_reduced_unscaled = {f'{k}_unscaled': v
#                                       for k, v in loss_dict_reduced.items()}
#         loss=sum(loss_dict_reduced_scaled.values())
#         true_losses[i] = loss.cpu().numpy()

#     loss_risk = (true_losses * risk_estimator_weights).mean()
#     return loss_risk

In [20]:
def run_one_surr_ensemble_risk_estimator(true_losses, expected_losses, seed, samples_num):
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    pick_sample_idxs, weights = acquire(expected_losses, samples_num)
    risk_estimator_weights = LURE_weights_for_risk_estimator(weights, expected_losses.size)
    sampled_true_losses = true_losses[pick_sample_idxs]

    loss_risk = (sampled_true_losses * risk_estimator_weights).mean()
    return loss_risk

In [21]:
file_path = result_json_path + "surr_ensemble_R50_31_10_runs_min.json"
expected_losses = get_acquistion_expected_loss(pro_results, model_nums, ego_model_index, loss_type = "min")
json_object = {}
for sample_size in sample_size_set:
    for seed in random_seed_set:
        result = {"active_test_type": "surr ensemble", "sample_size": sample_size}
        loss_risk = run_one_surr_ensemble_risk_estimator(true_losses, expected_losses, seed, sample_size)
        result["loss"] = loss_risk
        json_object[len(json_object)] = result
with open(file_path, "w") as outfile:
    json.dump(json_object, outfile)

In [22]:
file_path = result_json_path + "surr_ensemble_R50_31_10_runs_mean.json"
expected_losses = get_acquistion_expected_loss(pro_results, model_nums, ego_model_index, loss_type = "mean")
json_object = {}
for sample_size in sample_size_set:
    for seed in random_seed_set:
        result = {"active_test_type": "surr ensemble", "sample_size": sample_size}
        loss_risk = run_one_surr_ensemble_risk_estimator(true_losses, expected_losses, seed, sample_size)
        result["loss"] = loss_risk
        json_object[len(json_object)] = result
with open(file_path, "w") as outfile:
    json.dump(json_object, outfile)

In [23]:
file_path = result_json_path + "surr_ensemble_R50_31_10_runs_max.json"
expected_losses = get_acquistion_expected_loss(pro_results, model_nums, ego_model_index, loss_type = "max")
json_object = {}
for sample_size in sample_size_set:
    for seed in random_seed_set:
        result = {"active_test_type": "surr ensemble", "sample_size": sample_size}
        loss_risk = run_one_surr_ensemble_risk_estimator(true_losses, expected_losses, seed, sample_size)
        result["loss"] = loss_risk
        json_object[len(json_object)] = result
with open(file_path, "w") as outfile:
    json.dump(json_object, outfile)

## Whole data set risk estimation

In [24]:
def get_whole_data_set_risk_estimator(true_losses):
    return true_losses.mean()

In [25]:
file_path = result_json_path + "None_R50_31.json"
result = {"active_test_type": "None", "sample_size": len(deep_ensemble_results[ego_model_index])}
result["loss"] = get_whole_data_set_risk_estimator(true_losses)
json_object = {}
json_object[0] = result
with open(file_path, "w") as outfile:
    json.dump(json_object, outfile)

## Random Sample risk estimation

In [26]:
def run_one_random_sample_risk_estimator(true_losses, seed, samples_num):
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    perm = np.random.permutation(true_losses.size)
    pick_sample_idxs = perm[:samples_num]
    sampled_true_losses = true_losses[pick_sample_idxs]
    return sampled_true_losses.mean()

In [27]:
file_path = result_json_path + "random_sample_R50_31_10_runs.json"
json_object = {}
for sample_size in sample_size_set:
    for seed in random_seed_set:
        result = {"active_test_type": "random sample", "sample_size": sample_size}
        loss_risk = run_one_random_sample_risk_estimator(true_losses, seed, sample_size)
        result["loss"] = loss_risk
        json_object[len(json_object)] = result
with open(file_path, "w") as outfile:
    json.dump(json_object, outfile)