# Active Testing
data stored in ./pro_data

In [1]:
import argparse
import datetime
import json
import random
import time
from pathlib import Path
import os, sys
import numpy as np
import torch
import pickle
import copy
import random
from utils.utils import *

In [2]:
def LURE_weights_for_risk_estimator(weights, N):
    M = weights.size
    if M < N:
        m = np.arange(1, M+1)
        v = (
            1
            + (N-M)/(N-m) * (
                    1 / ((N-m+1) * weights)
                    - 1
                    )
            )
    else:
        v = 1

    return v

def acquire(expected_loss_inputs, samples_num):
    assert samples_num <= expected_loss_inputs.size
    expected_loss = np.copy(expected_loss_inputs)
    # Log-lik can be negative.
    # Make all values positive.
    if (expected_loss < 0).sum() > 0:
        expected_loss += np.abs(expected_loss.min())
    
    if np.any(np.isnan(expected_loss)):
        logging.warning(
            'Found NaN values in expected loss, replacing with 0.')
        logging.info(f'{expected_loss}')
        expected_loss = np.nan_to_num(expected_loss, nan=0)
    pick_sample_idxs = np.zeros((samples_num), dtype = int)
    idx_array = np.arange(expected_loss.size)
    weights = np.zeros((samples_num), dtype = np.single)
    uniform_clip_val = 0.2
    expected_loss = np.asarray(expected_loss).astype('float64')
    for i in range(samples_num):
        expected_loss /= expected_loss.sum()
        # clip all values less than 10 percent of uniform propability
        expected_loss = np.maximum(uniform_clip_val * 1/expected_loss.size, expected_loss)
        expected_loss /= expected_loss.sum()
        sample = np.random.multinomial(1, expected_loss)
        cur_idx = np.where(sample)[0][0]
        # cur_idx = np.random.randint(expected_loss.size)
        pick_sample_idxs[i] = idx_array[cur_idx]
        weights[i] = expected_loss[cur_idx]
        selected_mask = np.ones((expected_loss.size), dtype=bool)
        selected_mask[cur_idx] = False
        expected_loss = expected_loss[selected_mask]
        idx_array = idx_array[selected_mask]
    return pick_sample_idxs, weights

def run_one_random_sample_risk_estimator(true_losses, seed, samples_num):
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    perm = np.random.permutation(true_losses.size)
    pick_sample_idxs = perm[:samples_num]
    sampled_true_losses = true_losses[pick_sample_idxs]
    return float(sampled_true_losses.mean())

def run_one_active_test_risk_estimator(true_losses, expected_losses, seed, samples_num):
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    pick_sample_idxs, weights = acquire(expected_losses, samples_num)
    risk_estimator_weights = LURE_weights_for_risk_estimator(weights, expected_losses.size)
    sampled_true_losses = true_losses[pick_sample_idxs]

    loss_risk = (sampled_true_losses * risk_estimator_weights).mean()
    return float(loss_risk)

def active_testing(file_path, true_losses, expected_losses, active_test_type, display = False):
    json_object = {}
    for sample_size in sample_size_set:
        for seed in random_seed_set:
            result = {"active_test_type": active_test_type, "sample_size": sample_size}
            loss_risk = run_one_active_test_risk_estimator(true_losses, expected_losses, seed, sample_size)
            result["loss"] = loss_risk
            json_object[len(json_object)] = result
        if display:
            print(f"Complete simple size : {sample_size}")
    with open(file_path, "w") as outfile:
        json.dump(json_object, outfile)
        
def get_whole_data_set_risk_estimator(true_losses):
    return float(true_losses.mean())

In [7]:
split = "val"
base_path = f"./pro_data/PSPNet_VOC/{split}/"
data_type = "region_16" # image, region_8, region_16
if data_type == "image":
    true_losses = np_read(base_path + "image_true_losses.npy")
    sample_size_precentage = [0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.015, 0.02, 0.025, 0.03, 0.035, 0.04, 0.045,
                          0.05, 0.055, 0.06, 0.065, 0.07, 0.075, 0.08]
    result_json_path = "./results/image_based_active_testing/"
    vit_base_path = "../ViT-pytorch/output/"
if data_type == "image_2":
    true_losses = np_read(base_path + "image_split_2_2_true_losses.npy")
    sample_size_precentage = [0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.015, 0.02, 0.025, 0.03, 0.035, 0.04, 0.045,
                          0.05, 0.055, 0.06, 0.065, 0.07, 0.075, 0.08]
    result_json_path = "./results/image_split_2_2_active_testing/"
    vit_base_path = "../ViT-pytorch/output/"
elif data_type == "region_8":
    true_losses = np_read(base_path + "region_8_8_true_losses.npy")
    sample_size_precentage = np.linspace(0.00001, 0.0001, 20)
    # sample_size_precentage = np.linspace(0.00004, 0.00006, 20)
    # sample_size_precentage = np.linspace(0.00001, 0.00005, 2)
    result_json_path = "./results/region_8_8_active_testing/"
    # result_json_path = "./results/region_8_8_try/"
    vit_base_path = "../ViT-pytorch/output/region_8_8/"
elif data_type == "region_16":
    true_losses = np_read(base_path + "region_16_16_true_losses.npy")
    sample_size_precentage = np.linspace(0.0001, 0.001, 20)
    result_json_path = "./results/region_16_16_active_testing/"
    vit_base_path = "../ViT-pytorch/output/region_16_16/"
    
box_labels_nums = true_losses.shape[0]
sample_size_set = (np.array(sample_size_precentage) * box_labels_nums).astype(int).tolist()
# random_seed_set = [4519, 9524, 5901, 1028, 6382, 5383, 5095, 7635,  890,  608]
random_seed_set = [4519, 9524, 5901]

## Region VS Image

In [34]:
result_json_path = "./results/image_region_compare/"
sample_size_precentage = np.linspace(0.001, 0.01, 20)
true_losses = np_read(base_path + "image_true_losses.npy")
box_labels_nums = true_losses.shape[0]
sample_size_set = (np.array(sample_size_precentage) * box_labels_nums).astype(int).tolist()

file_path = result_json_path + "image_random_sample_runs.json"
json_object = {}
for sample_size in sample_size_set:
    for seed in random_seed_set:
        result = {"active_test_type": "image random sample", "sample_size": sample_size}
        loss_risk = run_one_random_sample_risk_estimator(true_losses, seed, sample_size)
        result["loss"] = float(loss_risk)
        json_object[len(json_object)] = result
write_one_results(json_object, file_path)

In [31]:
true_losses = np_read(base_path + "region_16_16_true_losses.npy")
box_labels_nums = true_losses.shape[0]
sample_size_set = (np.array(sample_size_precentage) * box_labels_nums).astype(int).tolist()

file_path = result_json_path + "region_16_16_random_sample_runs.json"
json_object = {}
for sample_size in sample_size_set:
    for seed in random_seed_set:
        result = {"active_test_type": "region 16x16 random sample", "sample_size": sample_size}
        loss_risk = run_one_random_sample_risk_estimator(true_losses, seed, sample_size)
        result["loss"] = float(loss_risk)
        json_object[len(json_object)] = result
write_one_results(json_object, file_path)

In [32]:
true_losses = np_read(base_path + "region_8_8_true_losses.npy")
box_labels_nums = true_losses.shape[0]
sample_size_set = (np.array(sample_size_precentage) * box_labels_nums).astype(int).tolist()

file_path = result_json_path + "region_8_8_random_sample_runs.json"
json_object = {}
for sample_size in sample_size_set:
    for seed in random_seed_set:
        result = {"active_test_type": "region 8x8 random sample", "sample_size": sample_size}
        loss_risk = run_one_random_sample_risk_estimator(true_losses, seed, sample_size)
        result["loss"] = float(loss_risk)
        json_object[len(json_object)] = result
write_one_results(json_object, file_path)

## Random Sample risk estimation

In [7]:
file_path = result_json_path + "random_sample_3_runs.json"
json_object = {}
for sample_size in sample_size_set:
    for seed in random_seed_set:
        result = {"active_test_type": "random sample", "sample_size": sample_size}
        loss_risk = run_one_random_sample_risk_estimator(true_losses, seed, sample_size)
        result["loss"] = float(loss_risk)
        json_object[len(json_object)] = result
write_one_results(json_object, file_path)

## Whole data set risk

In [5]:
file_path = result_json_path + "None.json"
result = {"active_test_type": "None", "sample_size": true_losses.size}
result["loss"] = get_whole_data_set_risk_estimator(true_losses)
json_object = {}
json_object[0] = result
with open(file_path, "w") as outfile:
    json.dump(json_object, outfile)

## ASE
dropout

In [38]:
## image level
ase_loss_path = base_path + "ase/"
ase_loss = None
file_num = len(os.listdir(ase_loss_path))
for i in range(file_num):
    temp_loss = np_read(ase_loss_path + str(i) + ".npy")
    temp_loss = np.mean(temp_loss, axis=(1, 2))
    if ase_loss is None:
        ase_loss = temp_loss
    else:
        ase_loss = np.concatenate((ase_loss, temp_loss))
file_path = result_json_path + "ase_runs.json"
active_testing(file_path, true_losses, ase_loss, "ASE")

In [43]:
## 16X16 level
avgpool = torch.nn.AdaptiveAvgPool2d((30,30))

def AveragePool(array):
    tensor = torch.from_numpy(array)
    tensor = avgpool(tensor)
    return tensor.numpy()

ase_loss_path = base_path + "ase/"
ase_loss = None
file_num = len(os.listdir(ase_loss_path))
for i in range(file_num):
    temp_loss = np_read(ase_loss_path + str(i) + ".npy")
    temp_loss = AveragePool(temp_loss).reshape(-1)
    if ase_loss is None:
        ase_loss = temp_loss
    else:
        ase_loss = np.concatenate((ase_loss, temp_loss))
file_path = result_json_path + "ase_runs.json"
active_testing(file_path, true_losses, ase_loss, "ASE")

In [17]:
## 8X8 level
avgpool = torch.nn.AdaptiveAvgPool2d((60,60))

def AveragePool(array):
    tensor = torch.from_numpy(array)
    tensor = avgpool(tensor)
    return tensor.numpy()

ase_loss_path = base_path + "ase/"
ase_loss = None
file_num = len(os.listdir(ase_loss_path))
for i in range(file_num):
    temp_loss = np_read(ase_loss_path + str(i) + ".npy")
    temp_loss = AveragePool(temp_loss).reshape(-1)
    if ase_loss is None:
        ase_loss = temp_loss
    else:
        ase_loss = np.concatenate((ase_loss, temp_loss))
file_path = result_json_path + "ase_runs.json"
active_testing(file_path, true_losses, ase_loss, "ASE")

KeyboardInterrupt: 

## Image based Active Testing

In [5]:
val_estimated_loss = np.array(read_one_results("../ViT-pytorch/output/ViT-output-PSPNet-VOC-train-ordinal_losses_7600.json")['losses'])
file_path = result_json_path + "ViT_output_train_ordinal_runs.json"
active_testing(file_path, true_losses, val_estimated_loss, "ViT")

In [25]:
expected_losses = np.array(read_one_results(vit_base_path + "ViT-output-PSPNet-VOC-train-ordinal-new_losses_3600.json")['losses']).squeeze()
# expected_losses = np.exp(expected_losses)
file_path = result_json_path + "ViT_output_train_ordinal_runs.json"
active_testing(file_path, true_losses, expected_losses, "ViT")

In [37]:
expected_losses = np.array(read_one_results(vit_base_path + "ViT-output-PSPNet-VOC-train-image_losses.json")['losses']).squeeze()
expected_losses = np.exp(expected_losses)
file_path = result_json_path + "ViT_output_train_image_runs.json"
active_testing(file_path, true_losses, expected_losses, "ViT")

In [70]:
expected_losses = np.array(read_one_results(vit_base_path + "mlp-output-PSPNet-VOC-train-grad_losses.json")['losses']).squeeze()
# expected_losses = np.exp(expected_losses)
file_path = result_json_path + "mlp_output_train_runs.json"
active_testing(file_path, true_losses, expected_losses, "MLP")

In [72]:
expected_losses = np.array(read_one_results(vit_base_path + "ViT-output-PSPNet-VOC-train-loss-design-entrop_losses.json")['losses']).squeeze()
expected_losses = np.exp(expected_losses)
file_path = result_json_path + "ViT_output_train_losses_design_entrop_runs.json"
active_testing(file_path, true_losses, expected_losses, "ViT")

In [28]:
expected_losses = np.copy(true_losses)
expected_losses[expected_losses<0.01] = 0.01
file_path = result_json_path + "temp_runs.json"
active_testing(file_path, true_losses, expected_losses, "temp")

In [74]:
train_true_losses = np_read("./pro_data/PSPNet_VOC/train/image_true_losses.npy")

In [82]:
interval = np.linspace(0, 0.6, num=21)
estimate_val = np.copy(true_losses)
for i in range(0, interval.shape[0]-1):
    estimate_val[true_losses>interval[i]] = (interval[i] + interval[i+1])/2
file_path = result_json_path + "ordinal_with_true_loss_runs.json"
active_testing(file_path, true_losses, estimate_val, "ordinal")

In [19]:
# image split 2x2
val_estimated_loss = np.array(read_one_results(vit_base_path + "ViT-output-PSPNet-VOC-train-ordinal-image-split-2x2_losses_8000.json")['losses'])
file_path = result_json_path + "ViT_output_train_ordinal_runs.json"
active_testing(file_path, true_losses, val_estimated_loss, "ViT")

## Region-based Active Testing

In [64]:
expected_losses = np.array(read_one_results(vit_base_path + "ViT-output-PSPNet-VOC-train-ordinal_losses_7600.json")['losses']).squeeze()
# expected_losses = np.exp(expected_losses)
file_path = result_json_path + "ViT_output_train_ordinal_runs.json"
active_testing(file_path, true_losses, expected_losses, "ViT", display=True)

Complete simple size : 130
Complete simple size : 192
Complete simple size : 253
Complete simple size : 315
Complete simple size : 377
Complete simple size : 439
Complete simple size : 501
Complete simple size : 562
Complete simple size : 624
Complete simple size : 686
Complete simple size : 748
Complete simple size : 809
Complete simple size : 871
Complete simple size : 933
Complete simple size : 995
Complete simple size : 1057
Complete simple size : 1118
Complete simple size : 1180
Complete simple size : 1242
Complete simple size : 1304


In [8]:
expected_losses = np.array(read_one_results(vit_base_path + "ViT-output-PSPNet-VOC-train-ordinal-new_losses_6000.json")['losses']).squeeze()
# expected_losses = np.exp(expected_losses)
file_path = result_json_path + "ViT_output_train_ordinal_runs.json"
active_testing(file_path, true_losses, expected_losses, "ViT", display=True)

Complete simple size : 130
Complete simple size : 192
Complete simple size : 253
Complete simple size : 315
Complete simple size : 377
Complete simple size : 439
Complete simple size : 501
Complete simple size : 562
Complete simple size : 624
Complete simple size : 686
Complete simple size : 748
Complete simple size : 809
Complete simple size : 871
Complete simple size : 933
Complete simple size : 995
Complete simple size : 1057
Complete simple size : 1118
Complete simple size : 1180
Complete simple size : 1242
Complete simple size : 1304


In [19]:
expected_losses = np.array(read_one_results(vit_base_path + "ViT-output-PSPNet-VOC-train-ordinal-8x8-grad-new_losses_6200.json")['losses']).squeeze()
# expected_losses = np.exp(expected_losses)
file_path = result_json_path + "ViT_output_train_ordinal_runs_try.json"
active_testing(file_path, true_losses, expected_losses, "ViT", display=True)

Complete simple size : 52
Complete simple size : 76
Complete simple size : 101
Complete simple size : 126
Complete simple size : 151
Complete simple size : 175
Complete simple size : 200
Complete simple size : 225
Complete simple size : 249
Complete simple size : 274
Complete simple size : 299
Complete simple size : 323
Complete simple size : 348
Complete simple size : 373
Complete simple size : 398
Complete simple size : 422
Complete simple size : 447
Complete simple size : 472
Complete simple size : 496
Complete simple size : 521
