# Test different metric to find the best one to select top 900 hard patches in the image

In [12]:
import argparse
import datetime
import json
import random
import time
from pathlib import Path
import os, sys
import numpy as np

import torch
from torch.utils.data import DataLoader, DistributedSampler, random_split, TensorDataset
from torch import nn
import matplotlib.pyplot as plt
import torch.nn.functional as F

In [24]:
def read_one_results(path):
    with open(path, "r") as outfile:
        data = json.load(outfile)
    return data

def write_one_results(path, json_data):
    with open(path, "w") as outfile:
        json.dump(json_data, outfile)
        
def display_data_hist(loss):
    plt.hist(loss, bins=50, label='ViT')
    plt.title('Loss Distribution')
    plt.xlabel('Loss')
    plt.ylabel('Probability Density')
    locs, _ = plt.yticks()
    plt.yticks(locs,np.round(locs/loss.shape[0],3))
    plt.legend()
    plt.show()
    
def np_read(file):
    with open(file, "rb") as outfile:
        data = np.load(outfile)
    return data
def np_write(data, file):
    with open(file, "wb") as outfile:
        np.save(outfile, data)
        
def compute_kl_divergence(class_prob_1, class_prob_2):
    kl_divergence = torch.sum(class_prob_1 * (torch.log(class_prob_1) - torch.log(class_prob_2)), dim=0)
    return kl_divergence

## Inconsistency-based metric
Not All Labels Are Equal: Rationalizing The Labeling Costs for Training Object Detection

In [124]:
split = "val"
base_path = "/workspace/pytorch-segmentation/pro_data/PSPNet_VOC/" + split + "/"
output_path = base_path + "output/"
label_path = base_path + "target/"
hflip_output_path = base_path + "hflip_output/"
true_loss = np_read(base_path + "region_8_8_true_losses.npy")
avgpool = torch.nn.AdaptiveAvgPool2d((60,60))

In [132]:
def compute_patch_kl(index):
    image_index = index % 8
    file_name = str(index//8) + ".npy"
    output = torch.from_numpy(np_read(output_path + file_name))
    hflip_output = torch.from_numpy(np_read(hflip_output_path + file_name))
    output = output[image_index]
    hflip_output = hflip_output[image_index]
    hflip_output = torch.flip(hflip_output, dims=[2])
    output = F.softmax(output, dim=0)
    hflip_output = F.softmax(hflip_output, dim=0)
    kl = compute_kl_divergence(output, hflip_output) + compute_kl_divergence(hflip_output, output)
    kl = kl.unsqueeze(dim=0)
    patch_kl = avgpool(kl).squeeze(dim=0)
    return patch_kl

def get_probability(index):
    image_index = index % 8
    file_name = str(index//8) + ".npy"
    output = torch.from_numpy(np_read(output_path + file_name))
    output = output[image_index]
    output = F.softmax(output, dim=0)
    return output

def get_hflip_probability(index):
    image_index = index % 8
    file_name = str(index//8) + ".npy"
    output = torch.from_numpy(np_read(hflip_output_path + file_name))
    output = output[image_index]
    output = torch.flip(output, dims=[2])
    output = F.softmax(output, dim=0)
    return output

def get_label(index):
    image_index = index % 8
    file_name = str(index//8) + ".npy"
    output = torch.from_numpy(np_read(label_path + file_name))
    output = output[image_index]
    return output

In [106]:
# analysis
larger_loc = np.where(true_loss > 9)[0]
larger_loc // 3600, larger_loc % 3600

(array([  88,   88,  208, 1446, 1446, 1446, 1446, 1446]),
 array([1266, 1325, 2097,  647,  710,  711,  770,  833]))

In [135]:
index = 1446
image_patch_true_loss = true_loss[index*patch_pre_image: (index+1)*patch_pre_image]
image_patch_kl = compute_patch_kl(index).reshape(-1).numpy()
prob = get_probability(index)
label = get_label(index)
hflip_prob = get_hflip_probability(index)
bad_case_index = [647,  710,  711,  770,  833]
for i in bad_case_index:
    print(f"{image_patch_true_loss[i]}, {image_patch_kl[i]}")

10.258706092834473, 0.00010118322825292125
9.76128101348877, 0.00013450768892653286
10.374043464660645, 0.0001369493402307853
9.936532020568848, 4.634981451090425e-05
10.661508560180664, 9.418664558324963e-05


In [137]:
patch_index = 647
h = patch_index // 60
w = patch_index % 60
patch_prob = prob[:,h*8:(h+1)*8, w*8:(w+1)*8]
patch_label = label[h*8:(h+1)*8, w*8:(w+1)*8]
patch_hflip_prob = hflip_prob[:,h*8:(h+1)*8, w*8:(w+1)*8]

In [142]:
h_index = 7
w_index = 7
print("probability:")
print(patch_prob[:,h_index,w_index])
print("label:")
print(patch_label[h_index,w_index])
print("flip probability:")
print(patch_hflip_prob[:,h_index,w_index])
# patch_prob[:,h_index,w_index], patch_label[h_index,w_index], patch_hflip_prob[:,h_index,w_index]

probability:
tensor([9.9982e-01, 2.4129e-06, 6.6885e-07, 1.6410e-06, 2.2348e-07, 3.2198e-05,
        6.1785e-07, 1.7244e-07, 2.5458e-07, 2.3559e-06, 2.7047e-07, 1.1101e-06,
        2.4424e-07, 6.3243e-07, 2.6687e-07, 1.2240e-04, 1.1638e-05, 1.8349e-07,
        3.4116e-07, 3.6509e-06, 2.3800e-06])
label:
tensor(5)
flip probability:
tensor([9.9976e-01, 6.8800e-06, 9.0307e-07, 2.5657e-06, 4.8761e-07, 3.3633e-05,
        1.6098e-06, 3.9884e-07, 4.7293e-07, 1.3535e-05, 4.3102e-07, 2.1768e-06,
        4.9448e-07, 9.9665e-07, 1.2732e-07, 1.4802e-04, 1.9782e-05, 3.4414e-07,
        8.3785e-07, 5.2300e-06, 2.7343e-06])


In [None]:
patch_pre_image = 3600
choose_patch_num = 900
image_nums = true_loss.size // patch_pre_image
pre_find_patch = np.zeros(image_nums)
for index in range(image_nums):
    image_patch_true_loss = true_loss[index*patch_pre_image: (index+1)*patch_pre_image]
    image_patch_kl = compute_patch_kl(index).reshape(-1).numpy()
    top_k_true_loss_index = np.sort(np.argsort(image_patch_true_loss)[-choose_patch_num:])
    top_k_patch_kl = np.sort(np.argsort(image_patch_kl)[-choose_patch_num:])
    common_integers = np.intersect1d(top_k_true_loss_index, top_k_patch_kl)
    pre_find_patch[index] = common_integers.size / choose_patch_num
print(f"{pre_find_patch.mean()} percentage of top {choose_patch_num} have been selected!")

In [90]:
threshold_list = [1,2,3,4,5,6,7,8,9]
and_count_list = np.zeros(len(threshold_list))
large_thre_count_list = np.zeros(len(threshold_list))
patch_pre_image = 3600
choose_patch_num = 900
image_nums = true_loss.size // patch_pre_image
for index in range(image_nums):
    image_patch_true_loss = true_loss[index*patch_pre_image: (index+1)*patch_pre_image]
    image_patch_kl = compute_patch_kl(index).reshape(-1).numpy()
    top_k_patch_kl = np.sort(np.argsort(image_patch_kl)[-choose_patch_num:])
    top_k_patch_kl_bool = np.full(shape=image_patch_kl.shape, fill_value=False, dtype=bool)
    top_k_patch_kl_bool[top_k_patch_kl] = True

    for i in range(len(threshold_list)):
        bool_large_thre = image_patch_true_loss > threshold_list[i]
        and_results = np.logical_and(bool_large_thre, top_k_patch_kl_bool)
        and_count_list[i] += and_results.sum()
        large_thre_count_list[i] += bool_large_thre.sum()
    if (index + 1) % 100 == 0:
        print(f"Process {index}")
percentage = and_count_list / large_thre_count_list
for i in range(len(threshold_list)):
    print(f"Threshold: {threshold_list[i]}, found percetage: {percentage[i]}")

Process 99
Process 199
Process 299
Process 399
Process 499
Process 599
Process 699
Process 799
Process 899
Process 999
Process 1099
Process 1199
Process 1299
Process 1399
Threshold: 1, found percetage: 0.7428618747929778
Threshold: 2, found percetage: 0.654007507175977
Threshold: 3, found percetage: 0.561374549819928
Threshold: 4, found percetage: 0.5047281323877069
Threshold: 5, found percetage: 0.45728155339805826
Threshold: 6, found percetage: 0.4068627450980392
Threshold: 7, found percetage: 0.3384615384615385
Threshold: 8, found percetage: 0.32432432432432434
Threshold: 9, found percetage: 0.0


In [98]:
# maxpool
maxpool = torch.nn.AdaptiveMaxPool2d((60,60))
def compute_patch_kl(index):
    image_index = index % 8
    file_name = str(index//8) + ".npy"
    output = torch.from_numpy(np_read(output_path + file_name))
    hflip_output = torch.from_numpy(np_read(hflip_output_path + file_name))
    output = output[image_index]
    hflip_output = hflip_output[image_index]
    hflip_output = torch.flip(hflip_output, dims=[2])
    output = F.softmax(output, dim=0)
    hflip_output = F.softmax(hflip_output, dim=0)
    kl = compute_kl_divergence(output, hflip_output) + compute_kl_divergence(hflip_output, output)
    kl = kl.unsqueeze(dim=0)
    patch_kl = maxpool(kl).squeeze(dim=0)
    return patch_kl

threshold_list = [1,2,3,4,5,6,7,8,9]
and_count_list = np.zeros(len(threshold_list))
large_thre_count_list = np.zeros(len(threshold_list))
patch_pre_image = 3600
choose_patch_num = 900
image_nums = true_loss.size // patch_pre_image
for index in range(image_nums):
    image_patch_true_loss = true_loss[index*patch_pre_image: (index+1)*patch_pre_image]
    image_patch_kl = compute_patch_kl(index).reshape(-1).numpy()
    top_k_patch_kl = np.sort(np.argsort(image_patch_kl)[-choose_patch_num:])
    top_k_patch_kl_bool = np.full(shape=image_patch_kl.shape, fill_value=False, dtype=bool)
    top_k_patch_kl_bool[top_k_patch_kl] = True

    for i in range(len(threshold_list)):
        bool_large_thre = image_patch_true_loss > threshold_list[i]
        and_results = np.logical_and(bool_large_thre, top_k_patch_kl_bool)
        and_count_list[i] += and_results.sum()
        large_thre_count_list[i] += bool_large_thre.sum()
    if (index + 1) % 100 == 0:
        print(f"Process {index}")
percentage = and_count_list / large_thre_count_list
for i in range(len(threshold_list)):
    print(f"Threshold: {threshold_list[i]}, found percetage: {percentage[i]}")

Process 99
Process 199
Process 299
Process 399
Process 499
Process 599
Process 699
Process 799
Process 899
Process 999
Process 1099
Process 1199
Process 1299
Process 1399
Threshold: 1, found percetage: 0.7442199403776085
Threshold: 2, found percetage: 0.6468867299624641
Threshold: 3, found percetage: 0.546968787515006
Threshold: 4, found percetage: 0.48857368006304175
Threshold: 5, found percetage: 0.4300970873786408
Threshold: 6, found percetage: 0.3897058823529412
Threshold: 7, found percetage: 0.3230769230769231
Threshold: 8, found percetage: 0.2972972972972973
Threshold: 9, found percetage: 0.0


## Entropy

In [94]:
val_estimated_loss = np.array(read_one_results("/workspace/ViT-pytorch/output/region_8_8/ViT-output-PSPNet-VOC-train-ordinal-8x8-patch_losses_9800.json")['losses'])
threshold_list = [1,2,3,4,5,6,7,8,9]
and_count_list = np.zeros(len(threshold_list))
large_thre_count_list = np.zeros(len(threshold_list))
top_k_patch_kl_bool = val_estimated_loss > 0.001

In [97]:
for i in range(len(threshold_list)):
    bool_large_thre = true_loss > threshold_list[i]
    and_results = np.logical_and(bool_large_thre, top_k_patch_kl_bool)
    and_count_list[i] += and_results.sum()
    large_thre_count_list[i] += bool_large_thre.sum()
percentage = and_count_list / large_thre_count_list
for i in range(len(threshold_list)):
    print(f"Threshold: {threshold_list[i]}, found percetage: {percentage[i]}")

Threshold: 1, found percetage: 0.8298111957601855
Threshold: 2, found percetage: 0.6811658202693751
Threshold: 3, found percetage: 0.5571728691476591
Threshold: 4, found percetage: 0.4578408195429472
Threshold: 5, found percetage: 0.3533980582524272
Threshold: 6, found percetage: 0.4019607843137255
Threshold: 7, found percetage: 0.5384615384615384
Threshold: 8, found percetage: 0.6486486486486487
Threshold: 9, found percetage: 0.125
