Toy example을 제외하고, 코드 구현, 결과를 통합해서 visualization하기 위한 script

다루고 있는 것으로는

# Code Implementation
- HVP
- IHVP-CG
- IHVP-NCG
- IHVP-SE

# Experimental Result

- TOP-k examples sorted by IF measure
- t-sne result
- interpretation of result
- TOP-k examples sorted by IF measure on specific class
- relabeling using IF measure


# Code Implementation

## HVP

Remark)

tensorflow에서는 gradient가 operator로 존재하며 이를 이용하면 hessian vector product를 automatic differentiation으로 구현할 수 있음. 그 결과 추가적인 error 없이 정확한 값을 얻을 수 있음.

반면 cntk에서는 gradient output이 값으로만 나오게 됨. 때문에 numerical differentiation을 이용해서 구현함. 그 결과 error가 조금 발생하게 됨.

In [1]:
import cntk as C
from cntk.device import try_set_default_device, gpu
try_set_default_device(gpu(0))

import numpy as np
import time
import os

from torch.utils.data import DataLoader

In [2]:
# Hessian Vector Product

def grad_inner_product(grad1, grad2):
    # inner product for dictionary-format gradients (output scalar value)
    
    val = 0
    
    assert(len(grad1)==len(grad2))
    
    for ks in grad1.keys():
        val += np.sum(np.multiply(grad1[ks],grad2[ks]))
        
    return val

def weight_update(w, v, r):
    # w: weights of neural network (tuple)
    # v: value for delta w (dictionary, e.g., gradient value)
    # r: hyperparameter for a gradient (scalar)

    for p in w:
        p.value += r * v[p]

def HVP(y, x, v):
    # Calculate Hessian vector product 
    # y: scalar function to be differentiated (function, e.g. cross entropy loss)
    # x: feed_dict value for the network (dictionary, e.g. {model.X: image_batch, model.y: label_batch})
    # v: vector to be producted (by Hessian) (numeric dictionary, e.g., g(z_test))
    ## w: variables to differentiate (numeric, e.g. neural network weight)
    
    # hyperparameter r
    r = 1e-2
    
    assert type(x)==dict, "Input of HVP is wrong. 'x' should be dictionary(feed dict format)"
    assert type(v)==dict, "Input of HVP is wrong. 'v' should be dictionary(weight:value format)"

    w = v.keys()
    
    # gradient for plus
    weight_update(w, v, +r)
    g_plus = y.grad(x, wrt=w)
  
    # gradient for minus
    weight_update(w, v, -2*r)
    g_minus = y.grad(x, wrt=w)
    
    # weight reconstruction
    weight_update(w, v, +r)
    
    hvp = {ks: (g_plus[ks] - g_minus[ks])/(2*r) for ks in g_plus.keys()}
       
    return hvp


## IHVP

논문에서 나온 conjugate gradient와 stochastic estimation 두 방법을 모두 구현함.

Remark)

원작자 코드에서는 conjugate gradient를 구현할 때 scipy의 ncg (Newton's conjugate gradient)를 사용해서 구현함. 하지만 ncg의 경우 update를 담당하는 outer loop 안에 update 수치를 찾기 위한 작은 inner loop를 하나 더 돌게 되는데, 적절한 해를 찾지 못할 경우 이 inner loop를 벗어나지 못할 가능성이 있음.
- 과거 버전에서는 while loop을 사용해서 진행되어 평생 벗어나지 못할 가능성이 있음.
- 최신 버전에서는 for loop을 사용해서 진행되어 cg_maxiter를 넘기면 벗어날 가능성이 있으나 이 값은 내부에서만 존재하는 hyperparameter라서 바꿔줄 수 없음. 내부에서 지정된 값은 20 * len(x0)인데, 우리의 경우 len(x0)가 80000이 넘어가기 때문에 사실상 며칠을 돌려도 끝나지 않음. (몇 십분 돌렸을 때 겨우 400번 정도 돌았음.)

때문에 scipy의 cg를 사용해서 구현함. 이 경우 maxiter를 이용하면 수렴하지 않더라도 학습을 중간에 끝낼 수 있음.

In [3]:
# Conjugate Gradient
from scipy.optimize import fmin_ncg, fmin_cg

def dic2vec(dic):
    # convert a dictionary with matrix values to a 1D vector
    # e.g. gradient of network -> 1D vector
    vec = np.concatenate([val.reshape(-1) for val in dic.values()])
    
    return vec

def vec2dic(vec, fmt):
    # convert a 1D vector to a dictionary of format fmt
    # fmt: {key: val.shape for (key,val) in dict}
    fmt_idx = [np.prod(val) for val in fmt.values()]
    vec_split = [vec[sum(fmt_idx[:i]):sum(fmt_idx[:i+1])] for i in range(len(fmt_idx))]
    dic = {key: vec_split[i].reshape(shape) for (i,(key,shape)) in enumerate(fmt.items())}

    return dic

def get_inverse_hvp_cg(model, y, v, data_set, method='Basic', **kwargs):
    # Calculate inverse hessian vector product over the training set using CG method
    # return x, which is the solution of QP, whose value is H^-1 v
    # model: neural network model (e.g. model)
    # y: scalar function output of the neural network (e.g. model.loss)
    # v: vector to be producted by inverse hessian (i.e.H^-1 v) (e.g. v_test)
    # data_set: training set to be summed in Hessian
    # method: Basic-> Conjugate Gradient, Newton -> Newton-Conjugate Gradient
    # kwargs: hyperparameters for conjugate gradient

    # hyperparameters
    batch_size = kwargs.pop('batch_size', 50)
    damping = kwargs.pop('damping', 0.0)
    avextol = kwargs.pop('avextol', 1e-8)
    maxiter = kwargs.pop('maxiter', 1e1)
    num_workers = kwargs.pop('num_workers', 6)
    
    dataloader = DataLoader(data_set, batch_size, shuffle=True, num_workers=num_workers)
    
    t0 = time.time()
    get_inverse_hvp_cg.cnt = 0

    def HVP_minibatch_val(y, v):
        # Calculate Hessian vector product w.r.t whole dataset
        # y: scalar function output of the neural network (e.g. model.loss)
        # v: vector to be producted by inverse hessian (i.e.H^-1 v) (numeric dictionary, e.g. v_test)
        
        ## model: neural network model (e.g. model)
        ## dataloader: dataloader for the training set
        ## damping: damp term to make hessian convex

        num_data = data_set.__len__()

        hvp_batch = {key: np.zeros_like(value) for key,value in v.items()}

        for img, lb in dataloader:
            img = img.numpy(); lb = lb.numpy()
            x_feed = {model.X: img, model.y:lb}
            hvp = HVP(y,x_feed,v)
            # add hvp value
            for ks in hvp.keys():
                hvp_batch[ks] += hvp[ks]/num_data

        return hvp_batch

    def fmin_loss_fn(x):
        x_dic = vec2dic(x, {key: val.shape for (key, val) in v.items()})
        hvp_val = HVP_minibatch_val(y, x_dic)

        return 0.5 * grad_inner_product(hvp_val, x_dic) - grad_inner_product(v, x_dic)

    def fmin_grad_fn(x):
        # x: 1D vector
        x_dic = vec2dic(x, {key: val.shape for (key, val) in v.items()})
        hvp_val = HVP_minibatch_val(y, x_dic)
        hvp_flat = dic2vec(hvp_val)
        v_flat = dic2vec(v)

        return hvp_flat - v_flat
    
    def fmin_hvp_fn(x, p):
        p_dic = vec2dic(p, {key: val.shape for (key, val) in v.items()})
        hvp_val = HVP_minibatch_val(y, p_dic)
        hvp_flat = dic2vec(hvp_val)

        return hvp_flat

    def cg_callback(x):
        x_dic = vec2dic(x, {key: val.shape for (key, val) in v.items()})
        print('iteration: {}'.format(get_inverse_hvp_cg.cnt), ', ', time.time()-t0, '(sec) elapsed')
        print('vector element-wise square: ', grad_inner_product(x_dic, x_dic))
        get_inverse_hvp_cg.cnt += 1
        
        return 0
    
    if method == 'Newton':
        fmin_results = fmin_ncg(\
                f = fmin_loss_fn, x0 = dic2vec(v), fprime = fmin_grad_fn,\
                fhess_p = fmin_hvp_fn, avextol = avextol, maxiter = maxiter, callback=cg_callback)
    else:
        fmin_results = fmin_cg(\
                f = fmin_loss_fn, x0 = dic2vec(v), fprime = fmin_grad_fn,\
                maxiter = maxiter, callback = cg_callback)
    
    return vec2dic(fmin_results, {key: val.shape for (key, val) in v.items()})

In [4]:
# Stochastic Estimation

def get_inverse_hvp_se(model, y, v, data_set, **kwargs):
    # Calculate inverse hessian vector product over the training set
    # model: neural network model (e.g. model)
    # y: scalar function output of the neural network (e.g. model.loss)
    # v: vector to be producted by inverse hessian (i.e.H^-1 v) (e.g. v_test)
    # data_set: training set to be summed in Hessian
    # kwargs: hyperparameters for stochastic estimation
    
    # hyperparameters
    recursion_depth = kwargs.pop('recursion_depth', 50) # epoch
    scale = kwargs.pop('scale', 1e1) # similar to learning rate
    damping = kwargs.pop('damping', 0.0) # paper reference: 0.01
    batch_size = kwargs.pop('batch_size', 1)
    num_samples = kwargs.pop('num_samples', 1) # the number of samples(:stochatic estimation of IF) to be averaged
    tolerance = kwargs.pop('tolerance', 1e-2) # the difference btw l2 norms of current and previous vector used for early stopping
    verbose = kwargs.pop('verbose', False)
    
    dataloader = DataLoader(data_set, batch_size, shuffle=True, num_workers=6)
    
    inv_hvps = []
    
    params = v.keys()
    
    for i in range(num_samples):
        # obtain num_samples inverse hvps
        cur_estimate = v
        prev_norm = 0
        
        for depth in range(recursion_depth):
            # epoch-scale recursion depth
            t1 = time.time()
            for img, lb in dataloader:
                img = img.numpy(); lb = lb.numpy()
                x_feed = {model.X: img, model.y:lb}
                hvp = HVP(y,x_feed,cur_estimate)
                # cur_estimate = v + (1-damping)*cur_estimate + 1/scale*(hvp/batch_size)
                cur_estimate = {ks: v[ks] + (1-damping/scale)*cur_estimate[ks] - (1/scale)*hvp[ks]/batch_size for ks in cur_estimate.keys()}
            
            if verbose:
                print('#w: \n', list(map(lambda x: x.value, params)), '\n#hvp: \n', hvp, '\n#ihvp: \n', cur_estimate)
            
            cur_norm = np.sqrt(grad_inner_product(cur_estimate,cur_estimate))
            print('Recursion depth: {}, norm: {}, time: {} \n'.format(depth, cur_norm,time.time()-t1))
            
            # divergence check
            if np.isnan(cur_norm):
                print('## The result has been diverged ##')
                break
            
            # convergence check
            if np.abs(cur_norm - prev_norm) <= tolerance:
                # change this to more precise one (<- scipy.fmin_cg also use gnorm)
                print('## Early stopped due to small change')
                break
            prev_norm = cur_norm
        
        inv_hvp = {ks: (1/scale)*cur_estimate[ks] for ks in cur_estimate.keys()}
        inv_hvps.append(inv_hvp)
    
    inv_hvp_val = {ks: np.mean([inv_hvps[i][ks] for i in range(num_samples)], axis=0) for ks in inv_hvps[0].keys()}
    
    return inv_hvp_val

## Toy Example for IHVP (CG, NCG, SE)

간단한 neural network를 사용해서 위 알고리즘이 잘 동작하는지 확인.

사실 network의 Hessian은 w에 대해서 locally convex함. 따라서 수렴하지 않거나 발산할 가능성이 있음. 

하지만 w를 고정시켜두고 이를 진행시켰을 때 만약 알고리즘이 locally convex한 경우에서도 잘 동작한다면, (1.25, -0.083) 값이 나와야 함.

세 알고리즘 모두 원하는 값에 잘 수렴함을 확인함.
(SE의 경우에는 scale에 따라서 발산할 때도 있음.)

이에 대한 자세한 결과는 ihvp_toy.ipynb를 참고

In [5]:
# toy example for inverse HVP (CG, NCG and SE)

class SimpleNet(object):
    def __init__(self):
        self.X = C.input_variable(shape=(1,))
        self.h = C.layers.Dense(1, activation=None, init=C.uniform(1), bias=False)(self.X)
        self.pred = C.layers.Dense(1, activation=None, init=C.uniform(1), bias=False)(self.h)
        self.y = C.input_variable(shape=(1,))
        self.loss = C.squared_error(self.pred, self.y)
        
class SimpleDataset(object):
    def __init__(self, images, labels):
        self._images, self._labels = images, labels
    
    def __getitem__(self, index):
        X = self._images[index]
        y = self._labels[index]
        
        return X, y
    
    def __len__(self):
        return len(self._images)


net = SimpleNet()

params = net.pred.parameters

x_feed = {net.X:np.array([[2.]],dtype=np.float32), net.y:np.array([[1.]],dtype=np.float32)}
v_feed = {p: np.ones_like(p.value) for p in params}

print('w1 = \n', params[0].value, '\nw2 = \n', params[1].value, '\nloss = \n', net.loss.eval(x_feed))
params[0].value = np.asarray([[1.]])
params[1].value = np.asarray([[1./3.]])
print('w1 = \n', params[0].value, '\nw2 = \n', params[1].value, '\nloss = \n', net.loss.eval(x_feed))

print('hvp', HVP(net.loss, x_feed, v_feed))

#images = np.asarray([[2.],[2.]], dtype=np.float32)
#labels = np.asarray([[1.],[1.]], dtype=np.float32)
images = np.asarray([[2.]], dtype=np.float32)
labels = np.asarray([[1.]], dtype=np.float32)

train_set = SimpleDataset(images,labels)

print('######## damping = 0.0, desired solution: [1.25, -0.08] ########'); t1 = time.time()
ihvp_ncg = get_inverse_hvp_cg(net, net.loss, v_feed, train_set, method='Newton', **{'damping': 0.0}); t2 = time.time()
ihvp_cg = get_inverse_hvp_cg(net, net.loss, v_feed, train_set, **{'damping': 0.0}); t3 = time.time()
ihvp_se = get_inverse_hvp_se(net, net.loss, v_feed, train_set, **{'damping': 0.0, 'recursion_depth': 100}); t4 = time.time()
print('inverse hvp_ncg', ihvp_ncg, '\ntime: ', t2-t1)
print('inverse hvp_cg', ihvp_cg, '\ntime: ', t3-t2 )
print('inverse hvp_se', ihvp_se, '\ntime: ', t4-t3)

# print('inverse hvp_ncg', get_inverse_hvp_ncg(net, net.loss, v_feed, train_set, **{'damping': 0.1}))
# print('inverse hvp_cg', get_inverse_hvp_cg(net, net.loss, v_feed, train_set, **{'damping': 0.1}))
# print('inverse hvp_se', get_inverse_hvp_se(net, net.loss, v_feed, train_set, **{'scale':10, 'damping':0.1}))

w1 = 
 [[-0.7322467]] 
w2 = 
 [[-0.68041325]] 
loss = 
 [  1.25268134e-05]
w1 = 
 [[ 1.]] 
w2 = 
 [[ 0.33333334]] 
loss = 
 [ 0.1111111]
hvp {Parameter('W', [], [1 x 1]): array([[ 2.22302079]], dtype=float32), Parameter('W', [], [1 x 1]): array([[ 9.33413506]], dtype=float32)}
######## damping = 0.0, desired solution: [1.25, -0.08] ########
iteration: 0 ,  0.19780492782592773 (sec) elapsed
vector element-wise square:  0.725185762087
iteration: 1 ,  0.36102795600891113 (sec) elapsed
vector element-wise square:  1.56326942006
iteration: 2 ,  0.5301332473754883 (sec) elapsed
vector element-wise square:  1.56095527625
iteration: 3 ,  0.7333381175994873 (sec) elapsed
vector element-wise square:  1.56935306871
iteration: 4 ,  1.154935359954834 (sec) elapsed
vector element-wise square:  1.56935306871
Optimization terminated successfully.
         Current function value: -0.583328
         Iterations: 5
         Function evaluations: 9
         Gradient evaluations: 13
         Hessian evaluat

# Experimental Result

Noisy EMNIST dataset을 사용해서 실험을 진행함.

이 데이터를 사용하는 이유는
- EMNIST dataset은 일반적으로 사용하는 typo이기 때문에 직관적으로 해석할 수 있음.
- noisy label 문제를 다루기 때문에 이와 연결지어 해석할 수 있음.
- 과거 학습된 network를 가지고 있음. (suawiki/noisy label 참고)

In [6]:
import os, sys
sys.path.append('../refer/datasets-analysis-cntk')
import json

from datasets import dataset

import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
import scipy.misc
from torch.utils.data import DataLoader

In [7]:
def show_image_from_data(img, show=True):
    # show image from numpy array
    # img: (C,W,H) numpy array

    if show:
        return 0

    #img_show = np.squeeze(np.transpose(img,[1,2,0]))
    img_show = np.transpose(img,[1,2,0])
    imshow(img_show)
    plt.show()

    return 0

In [8]:
def get_influence_val(model, ihvp, data_set, cosine=False, **kwargs):
    # Calculate influence function value when H^-1 v_test is given w.r.t. data_set
    # cf) this will be calculated sample-wisely due to memory issue
    
    # ihvp: inverse of Hessian Vector Product (dictionary) (e.g. H^-1 v_test)
    # data_set: data set to be fed (dataset class) (e.g. train_set)
    # kwargs: hyperparameters
    
    num_workers = kwargs.pop('num_workers', 6)

    if_list = []

    params = list(ihvp.keys()) # not (model.logits.parameters) due to freezing

    num_data = data_set.__len__()
    dataloader = DataLoader(data_set, 1, shuffle=False, num_workers=num_workers)

    t1 = time.time()
    for img, lb in dataloader:
        img = img.numpy(); lb = lb.numpy()
        gd = model.loss.grad({model.X:img, model.y:lb}, wrt=params)
        # cosine normalization
        if cosine:
            nrm = np.sqrt(grad_inner_product(gd,gd))
            gd = {k: v/nrm for k,v in gd.items()}
        if_val = -grad_inner_product(ihvp, gd) / num_data
        if_list.append(if_val)
    print('get_influence_val takes {} sec'.format(time.time()-t1))

    return if_list

In [25]:
def visualize_topk_samples(measure, data_set, num_sample=5, mask=None,\
        verbose='A/D', show=False, save_path='/Data/result/'):
    # measure: list of measure whose each element represents score of each datapoints
    # data_set: set of data to be visualized (e.g. train_set)
    # num_sample: the number of samples to be visualized
    # mask: (0,1) list for training set
    # verbose:
    #   ALL: show DISADV, ADV, INF, NEG examples
    #   A/D: show advantageous and disadvantageous examples
    # show: indicator that chooses plt.show or not
    
    def extract_annotation(data_set, indices, **kwargs):
        # extract image annotations from dataset w.r.t. indices
        
        # data_set: dataset structure
        # indices: set of sorted and sampled index (e.g. topk)
        # kwargs: other information to be annotated
        #   key: name of this feature (str) (e.g. influence function)
        #   value: value set of this feature for each datapoints (N x 1 numpy array) (e.g. if value)

        images = []; annotations = []

        for idx in indices:
            img, lb = data_set.__getitem__(idx)
            lb_str = data_set.anno_dict['classes'][str(np.argmax(lb))]
            filename = data_set.filename_list[idx]
            
            annotation = [\
                    'training set name: {}'.format(filename),\
                    'training set label(anno_dict): {}'.format(lb_str),\
                    ]

            for key in kwargs.keys():
                annotation.append('{}: {}'.format(key, kwargs[key][idx]))

            images.append(img)
            annotations.append('\n'.join(annotation))
        
        images = np.array(images)

        return images, annotations

    def draw_images_with_titles(images, filenames, show=False,\
            save_dir='/Data/result/images_with_titles.png'):
        
        N, Ch, H, W = images.shape

        if Ch == 1:
            images = np.tile(images, (1,3,1,1))

        fig, axes = plt.subplots(N, 1, figsize=(H,W))

        for idx in range(N):
            image = images[idx].transpose((1,2,0))
            filename = filenames[idx]
            _ = axes[idx].imshow(image)
            _ = axes[idx].axis('off')
            _ = axes[idx].set_title(filename)

        plt.savefig(os.path.join(save_dir))
        
        if show:
            plt.show()

        return 0
    
    num_data = data_set.__len__()
    assert(len(measure)==num_data) 

    if mask == None:
        argsort = np.argsort(measure)
    else:
        assert(len(mask) == len(measure))
        argsort = list(filter(lambda idx: mask[idx], np.argsort(measure)))

    topk = argsort[-1:-num_sample-1:-1] # samples that increase loss a lot
    botk = argsort[0:num_sample] # samples that decrease loss a lot

    # make folder
    if os.path.exists(save_path):
        os.makedirs(save_path)

    if verbose == 'A/D' or verbose == 'ALL':
        images, annotations = extract_annotation(data_set, topk, **{'measure': measure})
        draw_images_with_titles(images, annotations, show=show,\
                save_dir=save_path+'DISADVANTAGEOUS.png')

    return 0

In [10]:
# def IF_val(net, ihvp, data_set, cosine=False):
#     # Calculate influence function w.r.t ihvp and data_set
#     # This should be done in sample-wise, since the gradient operation will sum up over whole feed-dicted data
    
#     # ihvp: inverse hessian vector product (dictionary)
#     # data_set: data_set to be feed to the gradient operation (dataset)
#     IF_list = []
    
#     #params = net.logits.parameters
#     params = ihvp.keys()
    
#     dataloader = DataLoader(data_set, 1, shuffle=False, num_workers=6)
    
#     t1 = time.time()
#     for img, lb in dataloader:
#         img = img.numpy(); lb = lb.numpy()
#         gd = net.loss.grad({net.X:img, net.y:lb}, wrt=params)
#         if cosine:
#             nrm = np.sqrt(grad_inner_product(gd,gd))
#             gd = {k: v/nrm for k,v in gd.items()}
#         IF = -grad_inner_product(ihvp, gd) / len(dataloader)
#         IF_list.append(IF)
#     print('IF_val takes {} sec'.format(time.time()-t1))
        
#     return IF_list

# def visualize_topk_samples(measure, train_set, num_sample=5, mask=None, verbose='ALL', save_path='./result'):
#     # 'ALL': show DISADV / ADV / INF / NEG examples
#     # 'ADV': show ADV only
#     # 'DIS': show DIS only

#     axis = 2 # axis=1 -> column output / axis=2 -> row output
    
#     if mask == None:
#         argsort = np.argsort(measure)
#     else:
#         assert(len(mask) == len(measure))
#         argsort = list(filter(lambda idx: mask[idx], np.argsort(measure)))
    
#     topk = argsort[-1:-num_sample-1:-1]
#     botk = argsort[0:num_sample]
    
#     if not os.path.exists(save_path):
#         # make folder
#         os.makedirs(save_path)

#     if verbose == 'DIS' or verbose == 'ALL':
#         dis = []
#         true_label = ''; noisy_label = ''
#         print('\n## SHOW {}-MOST DISADVANTAGEOUS EXAMPLES ##\n'.format(num_sample))
#         for idx in topk:
#             img, lb = train_set.__getitem__(idx)
#             show_image_from_data(img)
#             print('training set name: ', train_set.filename_list[idx])
#             print('training set label: ', train_set.anno_dict['classes'][str(np.argmax(lb))])
#             print('IF measure: ', measure[idx])
#             print(trainval_list[idx]) # FIXME
#             dis.append(img)
#             true_label += train_set.filename_list[idx].split('_')[1]
#             noisy_label += train_set.anno_dict['classes'][str(np.argmax(lb))]
#         dis = np.squeeze(np.concatenate(dis, axis=axis))
#         scipy.misc.imsave(save_path+'/disadvantageous_true_{}_noisy_{}.png'.format(true_label, noisy_label), dis)

#     if verbose == 'ADV' or verbose == 'ALL':
#         adv = []
#         true_label = ''; noisy_label = ''
#         print('\n## SHOW {}-MOST ADVANTAGEOUS EXAMPLES ##\n'.format(num_sample))
#         for idx in botk:
#             img, lb = train_set.__getitem__(idx)
#             show_image_from_data(img)
#             print('training set name: ', train_set.filename_list[idx])
#             print('training set label: ', train_set.anno_dict['classes'][str(np.argmax(lb))])
#             print('IF measure: ', measure[idx])
#             print(trainval_list[idx])
#             adv.append(img)
#             true_label += train_set.filename_list[idx].split('_')[1]
#             noisy_label += train_set.anno_dict['classes'][str(np.argmax(lb))]
#         adv = np.squeeze(np.concatenate(adv, axis=axis))
#         scipy.misc.imsave(save_path+'/advantageous_true_{}_noisy_{}.png'.format(true_label, noisy_label), adv)

    if verbose == 'ALL':
        
        if mask == None:
            argsort_abs = np.argsort(np.abs(measure))
        else:
            assert(len(mask) == len(measure))
            argsort_abs = list(filter(lambda idx: mask[idx], np.argsort(np.abs(measure))))

        topk_abs = argsort_abs[-1:-num_sample-1:-1]
        botk_abs = argsort_abs[0:num_sample]
        
        inf = []
        true_label = ''; noisy_label = ''
        print('\n## SHOW {}-MOST INFLUENTIAL EXAMPLES ##\n'.format(num_sample))
        for idx in topk_abs:
            img, lb = train_set.__getitem__(idx)
            show_image_from_data(img)
            print('training set name: ', train_set.filename_list[idx])
            print('training set label: ', train_set.anno_dict['classes'][str(np.argmax(lb))])
            print('IF measure: ', measure[idx])
            inf.append(img)
            true_label += train_set.filename_list[idx].split('_')[1]
            noisy_label += train_set.anno_dict['classes'][str(np.argmax(lb))]
        inf = np.squeeze(np.concatenate(inf, axis=axis))
        scipy.misc.imsave(save_path+'/influential_true_{}_noisy_{}.png'.format(true_label, noisy_label), inf)

        neg = []
        true_label = ''; noisy_label = ''
        print('\n## SHOW {}-MOST NEGLIGIBLE EXAMPLES ##\n'.format(num_sample))
        for idx in botk_abs:
            img, lb = train_set.__getitem__(idx)
            show_image_from_data(img)
            print('training set name: ', train_set.filename_list[idx])
            print('training set label: ', train_set.anno_dict['classes'][str(np.argmax(lb))])
            print('IF measure: ', measure[idx])
            neg.append(img)
            true_label += train_set.filename_list[idx].split('_')[1]
            noisy_label += train_set.anno_dict['classes'][str(np.argmax(lb))]
        neg = np.squeeze(np.concatenate(neg, axis=axis))
        scipy.misc.imsave(save_path+'/negligible_true_{}_noisy_{}.png'.format(true_label, noisy_label), neg)
        
    return 0

In [11]:
# emnist dataset
root_dir = '/Data/emnist/balanced/original'

# sample size
trainval_list, anno_dict = dataset.read_data_subset(root_dir, mode='train1', sample_size=1000)
# trainval_list, anno_dict = dataset.read_data_subset(root_dir, mode='train1')
test_list, _ = dataset.read_data_subset(root_dir, mode='validation1', sample_size=500)

test_set = dataset.LazyDataset(root_dir, test_list, anno_dict, rescale=False)

img, _ = test_set.__getitem__(0)
Ch, H, W = img.shape

# emnist dataset: SANITY CHECK
print(len(test_set), type(test_set))
print(len(test_list), Ch, H, W)

500 <class 'datasets.dataset.LazyDataset'>
500 1 28 28


In [12]:
test_set.__len__()

500

In [14]:
# emnist network
from models.nn import cntk_ConvNet as ConvNet
# from models.nn import VGG_like as ConvNet

hp_d = dict() # hyperparameters for a network
mean = np.load('../refer/datasets-analysis-cntk/output/mean_emnist.npy')
hp_d['image_mean'] = np.transpose(np.tile(mean,(H,W,1)),(2,0,1))

net = ConvNet(test_set.__getitem__(0)[0].shape, len(anno_dict['classes']), **hp_d)
net.logits.restore('/Data/checkpts/emnist/temp_model_fold_1_trainval_ratio_1.0.dnn')

# emnist network: SANITY CHECK
start_time = time.time()
ys, y_preds, test_score, confusion_matrix = net.predict(test_set, **hp_d)
total_time = time.time() - start_time

print('Test error rate: {}'.format(test_score))
print('Total tack time(sec): {}'.format(total_time))
print('Tact time per image(sec): {}'.format(total_time / len(test_list)))
print('Confusion matrix: \n{}'.format(confusion_matrix))

conv1.shape (32, 28, 28)
pool1.shape (32, 14, 14)
conv2.shape (48, 14, 14)
pool2.shape (48, 7, 7)
conv3.shape (64, 7, 7)
Test error rate: 0.11599999999999999
Total tack time(sec): 0.4635584354400635
Tact time per image(sec): 0.000927116870880127
Confusion matrix: 
[[ 9  0  0 ...,  0  0  0]
 [ 0  3  0 ...,  0  0  0]
 [ 0  0 14 ...,  0  0  0]
 ..., 
 [ 0  0  0 ...,  8  0  0]
 [ 0  0  0 ...,  0 14  0]
 [ 0  0  0 ...,  0  0 10]]


## Inverse Hessian Vector Product w.r.t. Freezed Network

Gradient, Hessian을 구할 때 weight의 범위를 한정지어서 inversed HVP를 구함.

앞단 network를 freeze 시키는 데에는 두 가지 이유가 있음.

1. Convexity
    - Network가 깊어지면 깊어질 수록 convexity가 망가질 가능성이 있음.
    - 때문에 CG, NCG, SE 방법론을 사용할 때 값이 발산하는 문제가 발생함.
2. Computational Complexity
    - Weight가 많을 수록 계산이 복잡해지고, vanishing gradient 등의 문제로 인해 precision loss가 발생할 가능성이 늘어남.
    
이를 해결하기 위해 가장 간단하고 직관적인 방법인 network freezing을 사용함.
이는 feature extraction을 담당하는 앞부분 weight를 전부 고정하고, 
최종 layer의 weight만을 사용하여 convexity가 보장된 logistic regression문제로 바꾸는 것.

In [26]:
# DO THIS FOR SEVERAL EXAMPLES

# vec v.s. freeze v.s. se

train_set = dataset.LazyDataset(root_dir, trainval_list, anno_dict, rescale=False)
test_set = dataset.LazyDataset(root_dir, test_list, anno_dict, rescale=False)

save_dir = '/Data/result/influence'

for idx_test in range(0, 1):
    # Set a single test image

    name_test = test_list[idx_test]
    img_test, lb_test = test_set.__getitem__(idx_test)
    show_image_from_data(img_test)
    #v_test = net.loss.grad({net.X:img_test, net.y:lb_test}, wrt=params)
    
    lb_true = anno_dict['classes'][str(np.argmax(lb_test))]
    lb_pred = anno_dict['classes'][str(np.argmax(net.logits.eval({net.X:img_test})))]
    print('testfile name: ', name_test)
    print('ground truth label: ', lb_true)
    print('network prediction: ', lb_pred)

    save_path = os.path.join(save_dir, name_test.split('.')[0])
    
    if not os.path.exists(save_path):
        # make folder
        os.makedirs(save_path)
        
    scipy.misc.imsave(save_path+'/test_reference_true_{}_pred_{}.png'.format(lb_true,lb_pred), np.squeeze(img_test))

    np.save(save_path+'/trainval_list', trainval_list)
    np.save(save_path+'/test_list', test_list)

    # CALCULATE IF WITH FREEZED NETWORK

    params = net.loss.parameters
    p_ftex = net.d['dense1'].parameters
    p_logreg = tuple(set(params) - set(p_ftex)) # extract the weights of the last-layer (w,b)
    print(p_logreg)
    v_logreg = net.loss.grad({net.X:img_test, net.y:lb_test}, wrt=p_logreg)

    # Calculate influence functions

    # CG-FREEZE (1885 sec)
    t1 = time.time()
    ihvp_cg_logreg = get_inverse_hvp_cg(net, net.loss, v_logreg, train_set,**{'damping':0.0, 'maxiter':50})
    IF_cg_logreg = get_influence_val(net, ihvp_cg_logreg, train_set)
    print('CG_logreg takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_cg_logreg),min(IF_cg_logreg)]))
    np.save(save_path+'/if_cg_logreg.npy', IF_cg_logreg)
    #IF_cg_logreg = np.load(save_path+'/if_cg_logreg.npy')
    visualize_topk_samples(IF_cg_logreg, train_set, num_sample=5, save_path=save_path+'/cg-frz')
    
#     # VECTOR-FREEZE (175 sec)
#     t1 = time.time()
#     IF_v_logreg = IF_val(net, v_logreg, train_set)
#     print('V_logreg takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_v_logreg),min(IF_v_logreg)]))
#     np.save(save_path+'/if_v_logreg.npy', IF_v_logreg)
#     #IF_v_logreg = np.load(save_path+'/if_v_logreg.npy')
#     visualize_topk_samples(IF_v_logreg, train_set, num_sample=5, save_path=save_path+'/vec-frz')

#     # Vector-FULL (1688 sec)
#     t1 = time.time()
#     IF_v = IF_val(net, v_test, train_set)
#     print('V takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_v),min(IF_v)]))
#     np.save(save_path+'/if_v.npy', IF_v)
#     #IF_v = np.load(save_path+'/if_v.npy')
#     visualize_topk_samples(IF_v, train_set, num_sample=5, save_path=save_path+'/v')
    
#     # VECTOR-FREEZE-cosine-similarity (178 sec)
#     t1 = time.time()
#     IF_v_cos = IF_val(net, v_logreg, train_set, cosine=True)
#     print('V_cos takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_v_cos),min(IF_v_cos)]))
#     np.save(save_path+'/if_v_cos.npy', IF_v_cos)
#     #IF_v_cos = np.load(save_path+'/if_v_cos.npy')
#     visualize_topk_samples(IF_v_cos, train_set, num_sample=5, save_path=save_path+'/vec-cos')

#     # CG-FULL (1epoch, more than 3 hours, did it stuck at line search as it happened in ncg?)
#     t1 = time.time()
#     ihvp_cg = get_inverse_hvp_cg(net, net.loss, v_test, train_set,**{'damping':0.1, 'maxiter':100})
#     IF_cg = IF_val(net, ihvp_cg, train_set)
#     print('CG takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_cg),min(IF_cg)]))
#     np.save(save_path+'/if_cg.npy', IF_cg)
#     visualize_topk_samples(IF_cg, train_set, num_sample=5, save_path=save_path+'/cg')
    
#     # SE-FULL (? sec: diverge)
#     t1 = time.time()
#     ihvp_se = get_inverse_hvp_se(net, net.loss, v_test, train_set,**{'scale':1e5, 'damping':0.1, 'batch_size':50, 'recursion_depth':100})
#     IF_se = IF_val(net, ihvp_se, train_set)
#     print('SE takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_se),min(IF_se)]))
#     np.save(save_path+'/if_se.npy', IF_se)
#     visualize_topk_samples(IF_se, train_set, num_sample=5, save_path=save_path+'/se')
    
#     # SE-FREEZE (1065 sec)
#     t1 = time.time()
#     ihvp_se_logreg = get_inverse_hvp_se(net, net.loss, v_logreg, train_set,**{'scale':1e3, 'damping':0.1, 'batch_size':50, 'tolerance':0, 'recursion_depth':65})
#     IF_se_logreg = IF_val(net, ihvp_se_logreg, train_set)
#     print('SE_logreg takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_se_logreg),min(IF_se_logreg)]))
#     np.save(save_path+'/if_se_logreg.npy', IF_se_logreg)
#     #IF_se_logreg = np.load(save_path+'/if_se_logreg.npy')
#     visualize_topk_samples(IF_se_logreg, train_set, num_sample=5, save_path=save_path+'/se-frz')
    

testfile name:  train_n_76066.png
ground truth label:  n
network prediction:  n
(Parameter('W', [], [96 x 47]), Parameter('b', [], [47]))


`imsave` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imwrite`` instead.


iteration: 0 ,  1.075157880783081 (sec) elapsed
vector element-wise square:  0.0913993416143
iteration: 1 ,  4.955273628234863 (sec) elapsed
vector element-wise square:  0.0916222182896
iteration: 2 ,  8.44317078590393 (sec) elapsed
vector element-wise square:  0.0916998953871
         Current function value: -0.200502
         Iterations: 3
         Function evaluations: 45
         Gradient evaluations: 26
get_influence_val takes 1.5060575008392334 sec
CG_logreg takes 14.69578504562378 sec, and its max/min value [0.0017271030003903434, -0.0010314936068607494]


In [None]:
# DO THIS FOR SEVERAL EXAMPLES

# vec v.s. freeze v.s. se

# restore trainval_list, test_list
file_dir = './compare/result_net_nn_if_nn/train_e_99502'

trainval_list = list(np.load(file_dir+'/trainval_list.npy'))
train_set = dataset.LazyDataset(root_dir, trainval_list, anno_dict) # non-noisy dataset

test_list = list(np.load(file_dir+'/test_list.npy'))
test_set = dataset.LazyDataset(root_dir, test_list, anno_dict)

# FIXME
# network trained with noisy dataset, influence value with noisy dataset
train_set = dataset.LazyDataset(root_dir, trainval_list, noisy_anno_dict) # noisy dataset
save_dir = './compare/result_net_ns_if_ns' 
restore_dir = '/Data/checkpts/emnist/model_fold_1_trainval_ratio_0.3.dnn'

# # network trained with non-noisy dataset, influence value with non-noisy dataset
# train_set = dataset.LazyDataset(root_dir, trainval_list, anno_dict) # non-noisy dataset
# save_dir = './compare/result_net_nn_if_nn'
# restore_dir = '/Data/checkpts/emnist/model_fold_1_trainval_ratio_0.0.dnn' # non-noisy network

for idx_test in range(0, 30):
    # Set a single test image

    # # Re-sample a test instance
    # test_list, _ = dataset.read_data_subset(root_dir, mode='validation1', sample_size=100)
    # test_set = dataset.LazyDataset(root_dir, test_list, anno_dict)
    
    # Restore weights
    net.logits.restore(restore_dir)

    params = net.logits.parameters

    name_test = test_list[idx_test]
    img_test, lb_test = test_set.__getitem__(idx_test)
    show_image_from_data(img_test)
    v_test = net.loss.grad({net.X:img_test, net.y:lb_test}, wrt=params)
    
    lb_true = anno_dict['classes'][str(np.argmax(lb_test))]
    lb_pred = anno_dict['classes'][str(np.argmax(net.logits.eval({net.X:img_test})))]
    print('testfile name: ', name_test)
    print('ground truth label: ', lb_true)
    print('network prediction: ', lb_pred)

    save_path = os.path.join(save_dir, name_test.split('.')[0])
    
    if not os.path.exists(save_path):
        # make folder
        os.makedirs(save_path)
        
    scipy.misc.imsave(save_path+'/test_reference_true_{}_pred_{}.png'.format(lb_true,lb_pred), np.squeeze(img_test))

    np.save(save_path+'/trainval_list', trainval_list)
    np.save(save_path+'/test_list', test_list)

    # CALCULATE IF WITH FREEZED NETWORK

    params = net.loss.parameters
    p_ftex = net.d['dense1'].parameters
    p_logreg = tuple(set(params) - set(p_ftex)) # extract the weights of the last-layer (w,b)
    print(p_logreg)
    v_logreg = net.loss.grad({net.X:img_test, net.y:lb_test}, wrt=p_logreg)

    # Calculate influence functions

#     # CG-FREEZE (1885 sec)
#     t1 = time.time()
#     ihvp_cg_logreg = get_inverse_hvp_cg(net, net.loss, v_logreg, train_set,**{'damping':0.0, 'maxiter':50})
#     IF_cg_logreg = IF_val(net, ihvp_cg_logreg, train_set)
#     print('CG_logreg takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_cg_logreg),min(IF_cg_logreg)]))
#     np.save(save_path+'/if_cg_logreg.npy', IF_cg_logreg)
#     #IF_cg_logreg = np.load(save_path+'/if_cg_logreg.npy')
#     visualize_topk_samples(IF_cg_logreg, train_set, num_sample=5, save_path=save_path+'/cg-frz')
    
#     # VECTOR-FREEZE (175 sec)
#     t1 = time.time()
#     IF_v_logreg = IF_val(net, v_logreg, train_set)
#     print('V_logreg takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_v_logreg),min(IF_v_logreg)]))
#     np.save(save_path+'/if_v_logreg.npy', IF_v_logreg)
#     #IF_v_logreg = np.load(save_path+'/if_v_logreg.npy')
#     visualize_topk_samples(IF_v_logreg, train_set, num_sample=5, save_path=save_path+'/vec-frz')

#     # Vector-FULL (1688 sec)
#     t1 = time.time()
#     IF_v = IF_val(net, v_test, train_set)
#     print('V takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_v),min(IF_v)]))
#     np.save(save_path+'/if_v.npy', IF_v)
#     #IF_v = np.load(save_path+'/if_v.npy')
#     visualize_topk_samples(IF_v, train_set, num_sample=5, save_path=save_path+'/v')
    
#     # VECTOR-FREEZE-cosine-similarity (178 sec)
#     t1 = time.time()
#     IF_v_cos = IF_val(net, v_logreg, train_set, cosine=True)
#     print('V_cos takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_v_cos),min(IF_v_cos)]))
#     np.save(save_path+'/if_v_cos.npy', IF_v_cos)
#     #IF_v_cos = np.load(save_path+'/if_v_cos.npy')
#     visualize_topk_samples(IF_v_cos, train_set, num_sample=5, save_path=save_path+'/vec-cos')

#     # CG-FULL (1epoch, more than 3 hours, did it stuck at line search as it happened in ncg?)
#     t1 = time.time()
#     ihvp_cg = get_inverse_hvp_cg(net, net.loss, v_test, train_set,**{'damping':0.1, 'maxiter':100})
#     IF_cg = IF_val(net, ihvp_cg, train_set)
#     print('CG takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_cg),min(IF_cg)]))
#     np.save(save_path+'/if_cg.npy', IF_cg)
#     visualize_topk_samples(IF_cg, train_set, num_sample=5, save_path=save_path+'/cg')
    
#     # SE-FULL (? sec: diverge)
#     t1 = time.time()
#     ihvp_se = get_inverse_hvp_se(net, net.loss, v_test, train_set,**{'scale':1e5, 'damping':0.1, 'batch_size':50, 'recursion_depth':100})
#     IF_se = IF_val(net, ihvp_se, train_set)
#     print('SE takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_se),min(IF_se)]))
#     np.save(save_path+'/if_se.npy', IF_se)
#     visualize_topk_samples(IF_se, train_set, num_sample=5, save_path=save_path+'/se')
    
#     # SE-FREEZE (1065 sec)
#     t1 = time.time()
#     ihvp_se_logreg = get_inverse_hvp_se(net, net.loss, v_logreg, train_set,**{'scale':1e3, 'damping':0.1, 'batch_size':50, 'tolerance':0, 'recursion_depth':65})
#     IF_se_logreg = IF_val(net, ihvp_se_logreg, train_set)
#     print('SE_logreg takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_se_logreg),min(IF_se_logreg)]))
#     np.save(save_path+'/if_se_logreg.npy', IF_se_logreg)
#     #IF_se_logreg = np.load(save_path+'/if_se_logreg.npy')
#     visualize_topk_samples(IF_se_logreg, train_set, num_sample=5, save_path=save_path+'/se-frz')
    

figures for ppts
=====================


In [None]:
def find_idx(filename, filename_list):
    i = 0
    for fn in filename_list:
        if fn == filename:
            break
        i += 1
        
    return i

filename = 'train_e_99502.png'
fidx = find_idx(filename, test_list)

In [None]:
fig_sample = []
for i in range(20):
    img, lb = train_set[i]
    fig_sample.append(img)
#print(fig_sample[1].shape)
fig_sample = np.concatenate(fig_sample, axis=1)
show_image_from_data(fig_sample)
scipy.misc.imsave('./images/image_samples_total.png', np.squeeze(fig_sample))

In [None]:
# visualize image samples (MASK)

# FIXME #
num_sample = 10
filename = 'train_e_99502.png'
file_dir = './compare/result_net_nn_if_nn/train_e_99502'

sample_idx = find_idx(filename, test_list)
img_test, lb_test = test_set[0]
show_image_from_data(img_test)
mask = [anno_dict['images'][exmp]['class'][0] == np.argmax(lb_test) for exmp in trainval_list]
mask_inv = [not e for e in mask]
#len(mask_e)

fig_sample = np.empty((1,28,0))

# data with same label w.r.t test sample (set A)
cnt = 0
for i in range(len(trainval_list)):
    if mask[i]:
        img, lb = train_set[i]
        fig_sample = np.concatenate((fig_sample, img), axis=2)
        cnt += 1
        if cnt == num_sample:
            break

print(fig_sample.shape)
scipy.misc.imsave('./images/image_samples.png', np.squeeze(fig_sample))
show_image_from_data(fig_sample)

# data with the others (set B)
fig_sample = np.empty((1,28,0))
cnt = 0
for i in range(len(trainval_list)):
    if mask_inv[i]:
        img, lb = train_set[i]
        fig_sample = np.concatenate((fig_sample, img), axis=2)
        cnt += 1
        if cnt == num_sample:
            break

print(fig_sample.shape)
scipy.misc.imsave('./images/image_samples2.png', np.squeeze(fig_sample))
show_image_from_data(fig_sample)

# data somewhat similar to each other (A n B)
IF_cg_logreg = np.load(file_dir+'/if_cg_logreg.npy')
visualize_topk_samples(IF_cg_logreg, train_set, num_sample=5, save_path='./images/all') # show all
visualize_topk_samples(IF_cg_logreg, train_set, num_sample=5, mask=mask, save_path='./images/masked') # masked
visualize_topk_samples(IF_cg_logreg, train_set, num_sample=5, mask=mask_inv, save_path='./images/masked_inv') # masked

# idx_sort_mask = list(filter(lambda idx: mask[idx], np.argsort(IF_cg_logreg)))

# for idx in idx_sort_mask:
#     show_image_from_data(train_set[idx][0])
#     print(IF_cg_logreg[idx])

    #show top bot inf neg
#     break
# for fige in mask_e[0:5]:
#     idx = find_idx(mask_e, trainval_list)
#     print(fige,idx)
#     show_image_from_data(train_set[idx])

In [None]:
# axis change

# import glob

# file_dir = './compare/result_net_nn_if_nn/train_e_99502'
# restore_dir = '/Data/checkpts/emnist/model_fold_1_trainval_ratio_0.0.dnn' # non-noisy network
# net.logits.restore(restore_dir)

# trainval_list = list(np.load(file_dir+'/trainval_list.npy'))
# train_set = dataset.LazyDataset(root_dir, trainval_list, anno_dict) # non-noisy dataset

# test_list = list(np.load(file_dir+'/test_list.npy'))
# test_set = dataset.LazyDataset(root_dir, test_list, anno_dict)

# for dr in glob.glob('compare/result_net_nn_if_nn/*'):
#     dr_se = dr+'/if_se_logreg.npy'
#     if os.path.isfile(dr_se):
#         print(dr_se)
#         IF_se_logreg = np.load(dr_se)
#         visualize_topk_samples(IF_se_logreg, train_set, num_sample=5, save_path=dr+'/se-frz')
        
import glob

file_dir = './compare/result_net_ns_if_ns/train_e_99502'
restore_dir = '/Data/checkpts/emnist/model_fold_1_trainval_ratio_0.3.dnn' # noisy network
net.logits.restore(restore_dir)

trainval_list = list(np.load(file_dir+'/trainval_list.npy'))
train_set = dataset.LazyDataset(root_dir, trainval_list, noisy_anno_dict) # noisy dataset

test_list = list(np.load(file_dir+'/test_list.npy'))
test_set = dataset.LazyDataset(root_dir, test_list, anno_dict)

for dr in glob.glob('compare/result_net_ns_if_ns/*'):
    dr_se = dr+'/if_se_logreg.npy'
    if os.path.isfile(dr_se):
        print(dr_se)
        IF_se_logreg = np.load(dr_se)
        visualize_topk_samples(IF_se_logreg, train_set, num_sample=5, save_path=dr+'/se-frz')

In [None]:
# calculate IF measure for each samples of each classes

# WITH SEVERAL METHODOLOGIES

temp_list, _ = dataset.read_data_subset(root_dir, mode='validation1')

print('num_of_samples',len(temp_list))

# restore trainval_list, test_list
#file_dir = './compare/result_net_nn_if_nn/train_e_99502'
file_dir = './sample/result_net_nn_if_nn/train_B_69574'

trainval_list = list(np.load(file_dir+'/trainval_list.npy'))
test_list = list(np.load(file_dir+'/test_list.npy'))

dic = {ks: [] for ks in anno_dict['classes'].values()}

for tl in temp_list:
    dic[tl.split('_')[1]].append(tl)
    
#sample_list = [dic[ks][0] for ks in dic.keys()]
sample_list = list(np.load('./sample/result_net_nn_if_nn/train_B_69574/sample_list.npy'))
sample_set = dataset.LazyDataset(root_dir, sample_list, anno_dict) # non-noisy dataset

    
print(len(sample_list), sample_list)

# # network trained with non-noisy dataset, influence value with non-noisy dataset
# save_dir = './sample/result_net_nn_if_nn'
# restore_dir = '/Data/checkpts/emnist/model_fold_1_trainval_ratio_0.0.dnn' # non-noisy network
# train_set = dataset.LazyDataset(root_dir, trainval_list, anno_dict) # non-noisy dataset

# network trained with noisy dataset, influence value with noisy dataset
save_dir = './sample/result_net_ns_if_ns'
restore_dir = '/Data/checkpts/emnist/model_fold_1_trainval_ratio_0.3.dnn' # noisy network
train_set = dataset.LazyDataset(root_dir, trainval_list, noisy_anno_dict) # noisy dataset


for idx_test in range(47):
    # Set a single test image

    # Restore weights
    net.logits.restore(restore_dir)

    params = net.logits.parameters

    name_test = sample_list[idx_test]
    img_test, lb_test = sample_set.__getitem__(idx_test)
    show_image_from_data(img_test)
    v_test = net.loss.grad({net.X:img_test, net.y:lb_test}, wrt=params)
    
    lb_true = anno_dict['classes'][str(np.argmax(lb_test))]
    lb_pred = anno_dict['classes'][str(np.argmax(net.logits.eval({net.X:img_test})))]
    print('testfile name: ', name_test)
    print('ground truth label: ', lb_true)
    print('network prediction: ', lb_pred)

    save_path = os.path.join(save_dir, name_test.split('.')[0])
    
    if not os.path.exists(save_path):
        # make folder
        os.makedirs(save_path)
        
    scipy.misc.imsave(save_path+'/test_reference_true_{}_pred_{}.png'.format(lb_true,lb_pred), np.squeeze(img_test))

    np.save(save_path+'/trainval_list', trainval_list)
    np.save(save_path+'/test_list', test_list)
    np.save(save_path+'/temp_list', temp_list)
    np.save(save_path+'/sample_list', sample_list)

    # CALCULATE IF WITH FREEZED NETWORK

    params = net.loss.parameters
    p_ftex = net.d['dense1'].parameters
    p_logreg = tuple(set(params) - set(p_ftex)) # extract the weights of the last-layer (w,b)
    print(p_logreg)
    v_logreg = net.loss.grad({net.X:img_test, net.y:lb_test}, wrt=p_logreg)

#     # Calculate influence functions

#     # CG-FREEZE (1885 sec)
#     t1 = time.time()
#     ihvp_cg_logreg = get_inverse_hvp_cg(net, net.loss, v_logreg, train_set,**{'damping':0.0, 'maxiter':50})
#     IF_cg_logreg = IF_val(net, ihvp_cg_logreg, train_set)
#     print('CG_logreg takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_cg_logreg),min(IF_cg_logreg)]))
#     np.save(save_path+'/if_cg_logreg.npy', IF_cg_logreg)
#     #IF_cg_logreg = np.load(save_path+'/if_cg_logreg.npy')
#     visualize_topk_samples(IF_cg_logreg, train_set, num_sample=5, save_path=save_path+'/cg-frz')
    
#     # VECTOR-FREEZE (175 sec)
#     t1 = time.time()
#     IF_v_logreg = IF_val(net, v_logreg, train_set)
#     print('V_logreg takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_v_logreg),min(IF_v_logreg)]))
#     np.save(save_path+'/if_v_logreg.npy', IF_v_logreg)
#     #IF_v_logreg = np.load(save_path+'/if_v_logreg.npy')
#     visualize_topk_samples(IF_v_logreg, train_set, num_sample=5, save_path=save_path+'/vec-frz')

#     # Vector-FULL (1688 sec)
#     t1 = time.time()
#     IF_v = IF_val(net, v_test, train_set)
#     print('V takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_v),min(IF_v)]))
#     np.save(save_path+'/if_v.npy', IF_v)
#     #IF_v = np.load(save_path+'/if_v.npy')
#     visualize_topk_samples(IF_v, train_set, num_sample=5, save_path=save_path+'/v')
    
#     # VECTOR-FREEZE-cosine-similarity (178 sec)
#     t1 = time.time()
#     IF_v_cos = IF_val(net, v_logreg, train_set, cosine=True)
#     print('V_cos takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_v_cos),min(IF_v_cos)]))
#     np.save(save_path+'/if_v_cos.npy', IF_v_cos)
#     #IF_v_cos = np.load(save_path+'/if_v_cos.npy')
#     visualize_topk_samples(IF_v_cos, train_set, num_sample=5, save_path=save_path+'/vec-cos')

#     # CG-FULL (1epoch, more than 3 hours, did it stuck at line search as it happened in ncg?)
#     t1 = time.time()
#     ihvp_cg = get_inverse_hvp_cg(net, net.loss, v_test, train_set,**{'damping':0.1, 'maxiter':100})
#     IF_cg = IF_val(net, ihvp_cg, train_set)
#     print('CG takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_cg),min(IF_cg)]))
#     np.save(save_path+'/if_cg.npy', IF_cg)
#     visualize_topk_samples(IF_cg, train_set, num_sample=5, save_path=save_path+'/cg')
    
#     # SE-FULL (? sec: diverge)
#     t1 = time.time()
#     ihvp_se = get_inverse_hvp_se(net, net.loss, v_test, train_set,**{'scale':1e5, 'damping':0.1, 'batch_size':50, 'recursion_depth':100})
#     IF_se = IF_val(net, ihvp_se, train_set)
#     print('SE takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_se),min(IF_se)]))
#     np.save(save_path+'/if_se.npy', IF_se)
#     visualize_topk_samples(IF_se, train_set, num_sample=5, save_path=save_path+'/se')
    
    # SE-FREEZE (1065 sec)
    t1 = time.time()
    ihvp_se_logreg = get_inverse_hvp_se(net, net.loss, v_logreg, train_set,**{'scale':1e3, 'damping':0.1, 'batch_size':50, 'tolerance':0, 'recursion_depth':65})
    IF_se_logreg = IF_val(net, ihvp_se_logreg, train_set)
    print('SE_logreg takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_se_logreg),min(IF_se_logreg)]))
    np.save(save_path+'/if_se_logreg.npy', IF_se_logreg)
    #IF_se_logreg = np.load(save_path+'/if_se_logreg.npy')
    visualize_topk_samples(IF_se_logreg, train_set, num_sample=5, save_path=save_path+'/se-frz')
    

In [None]:
def normalize_list(ls):
    # normalize elements in the list. 
    # the distribution of each element follows normal distribution
    # i.e. (x_i-mean)/sigma
    
    m = np.mean(ls)
    v = np.sqrt(np.var(ls))
    
    return np.array([(e-m)/v for e in ls])

In [None]:
# summation over several examples and visualize samples

import glob

# file_dir = './sample/result_net_nn_if_nn/train_N_43201'
# restore_dir = '/Data/checkpts/emnist/model_fold_1_trainval_ratio_0.0.dnn' # non-noisy network
# net.logits.restore(restore_dir)

# trainval_list = list(np.load(file_dir+'/trainval_list.npy'))
# train_set = dataset.LazyDataset(root_dir, trainval_list, anno_dict) # non-noisy dataset

# test_list = list(np.load(file_dir+'/test_list.npy'))
# test_set = dataset.LazyDataset(root_dir, test_list, anno_dict)

file_dir = './sample/result_net_ns_if_ns/train_N_43201'
restore_dir = '/Data/checkpts/emnist/model_fold_1_trainval_ratio_0.3.dnn' # non-noisy network
net.logits.restore(restore_dir)

trainval_list = list(np.load(file_dir+'/trainval_list.npy'))
train_set = dataset.LazyDataset(root_dir, trainval_list, noisy_anno_dict) # non-noisy dataset

test_list = list(np.load(file_dir+'/test_list.npy'))
test_set = dataset.LazyDataset(root_dir, test_list, anno_dict)

IF_batch = []

for dr in glob.glob('sample/result_net_ns_if_ns/*'):
    #dr_if = dr+'/if_v_logreg.npy'
    dr_if = dr+'/if_se_logreg.npy'
    if os.path.isfile(dr_if):
        print(dr_if)
        IF = np.load(dr_if)
        # normalize?
        #IF = normalize_list(IF)
        IF_batch.append(IF)

IF_batch = np.mean(IF_batch, axis=0)
visualize_topk_samples(IF_batch, train_set, num_sample=20, save_path='/tmp')

In [None]:
# calculate IF measure w.r.t. fixed length of dataset

# WITH SEVERAL METHODOLOGIES

temp_list, _ = dataset.read_data_subset(root_dir, mode='validation1')

print('num_of_samples',len(temp_list))

# restore trainval_list, test_list
file_dir = './compare/result_net_nn_if_nn/train_e_99502'

trainval_list = list(np.load(file_dir+'/trainval_list.npy'))
train_set = dataset.LazyDataset(root_dir, trainval_list, anno_dict) # non-noisy dataset

test_list = list(np.load(file_dir+'/test_list.npy'))
test_set = dataset.LazyDataset(root_dir, test_list, anno_dict)

dic = {ks: [] for ks in anno_dict['classes'].values()}

for tl in temp_list:
    dic[tl.split('_')[1]].append(tl)
    
sample_list = [dic[ks][0] for ks in dic.keys()]
sample_set = dataset.LazyDataset(root_dir, sample_list, anno_dict) # non-noisy dataset

    
print(len(sample_list), sample_list)

# 저장경로 변경 (compare 말고)

# network trained with non-noisy dataset, influence value with non-noisy dataset
save_dir = './sample/result_net_nn_if_nn'
restore_dir = '/Data/checkpts/emnist/model_fold_1_trainval_ratio_0.0.dnn' # non-noisy network

for idx_test in range(47):
    # Set a single test image

    # Restore weights
    net.logits.restore(restore_dir)

    params = net.logits.parameters

    name_test = sample_list[idx_test]
    img_test, lb_test = sample_set.__getitem__(idx_test)
    show_image_from_data(img_test)
    v_test = net.loss.grad({net.X:img_test, net.y:lb_test}, wrt=params)
    
    lb_true = anno_dict['classes'][str(np.argmax(lb_test))]
    lb_pred = anno_dict['classes'][str(np.argmax(net.logits.eval({net.X:img_test})))]
    print('testfile name: ', name_test)
    print('ground truth label: ', lb_true)
    print('network prediction: ', lb_pred)

    save_path = os.path.join(save_dir, name_test.split('.')[0])
    
    if not os.path.exists(save_path):
        # make folder
        os.makedirs(save_path)
        
    scipy.misc.imsave(save_path+'/test_reference_true_{}_pred_{}.png'.format(lb_true,lb_pred), np.squeeze(img_test))

    np.save(save_path+'/trainval_list', trainval_list)
    np.save(save_path+'/test_list', test_list)
    np.save(save_path+'/temp_list', temp_list)
    np.save(save_path+'/sample_list', sample_list)

    # CALCULATE IF WITH FREEZED NETWORK

    params = net.loss.parameters
    p_ftex = net.d['dense1'].parameters
    p_logreg = tuple(set(params) - set(p_ftex)) # extract the weights of the last-layer (w,b)
    print(p_logreg)
    v_logreg = net.loss.grad({net.X:img_test, net.y:lb_test}, wrt=p_logreg)

#     # Calculate influence functions

#     # CG-FREEZE (1885 sec)
#     t1 = time.time()
#     ihvp_cg_logreg = get_inverse_hvp_cg(net, net.loss, v_logreg, train_set,**{'damping':0.0, 'maxiter':50})
#     IF_cg_logreg = IF_val(net, ihvp_cg_logreg, train_set)
#     print('CG_logreg takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_cg_logreg),min(IF_cg_logreg)]))
#     np.save(save_path+'/if_cg_logreg.npy', IF_cg_logreg)
#     #IF_cg_logreg = np.load(save_path+'/if_cg_logreg.npy')
#     visualize_topk_samples(IF_cg_logreg, train_set, num_sample=5, save_path=save_path+'/cg-frz')
    
#     # VECTOR-FREEZE (175 sec)
#     t1 = time.time()
#     IF_v_logreg = IF_val(net, v_logreg, train_set)
#     print('V_logreg takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_v_logreg),min(IF_v_logreg)]))
#     np.save(save_path+'/if_v_logreg.npy', IF_v_logreg)
#     #IF_v_logreg = np.load(save_path+'/if_v_logreg.npy')
#     visualize_topk_samples(IF_v_logreg, train_set, num_sample=5, save_path=save_path+'/vec-frz')

#     # Vector-FULL (1688 sec)
#     t1 = time.time()
#     IF_v = IF_val(net, v_test, train_set)
#     print('V takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_v),min(IF_v)]))
#     np.save(save_path+'/if_v.npy', IF_v)
#     #IF_v = np.load(save_path+'/if_v.npy')
#     visualize_topk_samples(IF_v, train_set, num_sample=5, save_path=save_path+'/v')
    
#     # VECTOR-FREEZE-cosine-similarity (178 sec)
#     t1 = time.time()
#     IF_v_cos = IF_val(net, v_logreg, train_set, cosine=True)
#     print('V_cos takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_v_cos),min(IF_v_cos)]))
#     np.save(save_path+'/if_v_cos.npy', IF_v_cos)
#     #IF_v_cos = np.load(save_path+'/if_v_cos.npy')
#     visualize_topk_samples(IF_v_cos, train_set, num_sample=5, save_path=save_path+'/vec-cos')

#     # CG-FULL (1epoch, more than 3 hours, did it stuck at line search as it happened in ncg?)
#     t1 = time.time()
#     ihvp_cg = get_inverse_hvp_cg(net, net.loss, v_test, train_set,**{'damping':0.1, 'maxiter':100})
#     IF_cg = IF_val(net, ihvp_cg, train_set)
#     print('CG takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_cg),min(IF_cg)]))
#     np.save(save_path+'/if_cg.npy', IF_cg)
#     visualize_topk_samples(IF_cg, train_set, num_sample=5, save_path=save_path+'/cg')
    
#     # SE-FULL (? sec: diverge)
#     t1 = time.time()
#     ihvp_se = get_inverse_hvp_se(net, net.loss, v_test, train_set,**{'scale':1e5, 'damping':0.1, 'batch_size':50, 'recursion_depth':100})
#     IF_se = IF_val(net, ihvp_se, train_set)
#     print('SE takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_se),min(IF_se)]))
#     np.save(save_path+'/if_se.npy', IF_se)
#     visualize_topk_samples(IF_se, train_set, num_sample=5, save_path=save_path+'/se')
    
    # SE-FREEZE (1065 sec)
    t1 = time.time()
    ihvp_se_logreg = get_inverse_hvp_se(net, net.loss, v_logreg, train_set,**{'scale':1e3, 'damping':0.1, 'batch_size':50, 'tolerance':0, 'recursion_depth':65})
    IF_se_logreg = IF_val(net, ihvp_se_logreg, train_set)
    print('SE_logreg takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_se_logreg),min(IF_se_logreg)]))
    np.save(save_path+'/if_se_logreg.npy', IF_se_logreg)
    #IF_se_logreg = np.load(save_path+'/if_se_logreg.npy')
    visualize_topk_samples(IF_se_logreg, train_set, num_sample=5, save_path=save_path+'/se-frz')
    

border 2
======================

In [None]:
# compare value between two labels

IF_f = np.load('./sample/result_net_nn_if_nn/train_f_952/if_se_logreg.npy')
IF_F = np.load('./sample/result_net_nn_if_nn/train_F_41402/if_se_logreg.npy')

idx = np.argmax(IF_f)
print(idx)
img_temp, _ = train_set[idx]
show_image_from_data(img_temp)
print(IF_f[idx], IF_F[idx])

In [None]:
# rename folder (test_list -> sample_list)
dr = './sample/result_net_nn_if_nn/'

for i in range(1,47):
    tn = test_list[i].split('.')[0]; sn = sample_list[i].split('.')[0]
    print('from {} to {}'.format(tn, sn))
    tt = list(filter(lambda x: 'reference' in x, glob.glob(dr+tn+'/*')))[0]
    print(tt.split('_')[-3])
    os.rename(dr+tn, dr+sn)

In [None]:
# rename result images (advantageous, true_x1_noisy_x2 -> true_x2_noisy_x1)
dr = './sample/result_net_ns_if_ns/'

DOOOOOOOOOOOOOOOOOTHISSSSSSSSSSSSSSSSSSSSSSSSS

In [None]:
# RELABELING

import glob
import json
import numpy as np
import random
import os
import matplotlib.pyplot as plt

from torch.utils.data import DataLoader
from datasets import dataset28 as dataset
from models.nn import VGG as ConvNet
from learning.evaluators import ErrorRateEvaluator as Evaluator

import time

def review(ratios, method):
    # ratios: the list of ratio which is the proportion of the data considered to be reviewed.  
    # reviewing is done by oracle. the label may or may not be changed. 
    #(i.e. if a single reviewed data has correct label, the label won't be changed, and vice versa)
    # method: the methodology of selecting data torch be reviewed. 
    # this can be 'random', 'influence', loss', 'entropy'
    t1 = time.time()

    # FIXME
    anno_dir = '/Data/emnist/balanced/original/annotation/'
    root_dir = '/Data/emnist/balanced/original/'
    #checkpt_dir = '/Data/github/interview/save/dropout_0.5_noaugmentation/model_fold_1_trainval_ratio_0.3.dnn'
    #checkpt_dir = '/Data/checkpts/emnist/model_fold_1_trainval_ratio_0.0.dnn'
    checkpt_dir = '/Data/checkpts/emnist/model_fold_1_trainval_ratio_0.3.dnn'

    with open(anno_dir + 'annotation1.json', 'r') as fid:
        annotation = json.load(fid)

    with open(anno_dir + 'annotation1_wp_0.3.json', 'r') as fid:
        noisy_annotation = json.load(fid)

    #image_list = list(noisy_annotation['images'].keys())
    image_list = trainval_list
    num_image = len(image_list)

    ## sorting
    if method == 'random':
        # random policy
        image_list_random = image_list[:]
        random.shuffle(image_list_random)
        review_list = image_list_random
        #review_list_random = np.random.choice(image_list, int(num_image * ratio), replace=False)
        
    elif method == 'influence':
        # influence function
        save_path = './sample/result_net_ns_if_ns/train_K_97272'
        IF_measure = np.load(save_path+'/if_se_logreg.npy')
        argsort_abs = np.argsort(np.abs(IF_measure))[::-1]
        #review_list = image_list[argsort_abs]
        review_list = [image_list[idx] for idx in argsort_abs]
#         noisy_list = [noisy_annotation['images'][fname]['class'] for fname in review_list]
#         print(review_list[0:int(num_image * ratios[0])])
#         print(noisy_list[0:int(num_image * ratios[0])])

    elif method == 'influence-sum':
        # summation of influence function among several samples
        save_path = glob.glob('./sample/result_net_ns_if_ns/*')
        IF_measures = []
        for pth in save_path:
            IF_measure = np.load(pth+'/if_se_logreg.npy')
            IF_measures.append(np.abs(IF_measure))
        IF_measures = np.mean(IF_measures, axis=0)
        argsort_abs = np.argsort(IF_measures)[::-1]
        review_list = [image_list[idx] for idx in argsort_abs]
        
    else:
        # loss
        image_set = dataset.LazyDataset(root_dir, image_list, noisy_annotation)
        model = ConvNet(image_set.__getitem__(0)[0].shape, len(annotation['classes']))
        model.logits.restore(checkpt_dir)
        evaluator = Evaluator()
        
        # extract loss, entropy
        t1_measure = time.time()
        loss, entropy = network_based_measure(model, image_set)
        t2_measure = time.time()
        print('measure extraction takes {}'.format(t2_measure-t1_measure))
        # check data // filename[0] and __getitem__[0] and dataloader first instance
        # -> all of them are same. in other word, we can use an index information
        
        if method == 'loss':
            # loss ascending policy
            idx_loss = np.argsort(loss)[::-1]
            image_list_loss = [image_list[i] for i in idx_loss]
            review_list = image_list_loss
        
        elif method == 'entropy':
            # entropy ascending policy
            idx_entropy = np.argsort(entropy)[::-1]
            image_list_entropy = [image_list[i] for i in idx_entropy]
            review_list = image_list_entropy

    ## correcting
    corrected_list = []
    for ratio in ratios:
        print(ratio)
        num_corrected = 0
        review_list_ratio = review_list[0:int(num_image * ratio)]
        print(len(review_list_ratio))
        for fname in review_list_ratio:
            correct_class = annotation['images'][fname]['class']
            noisy_class = noisy_annotation['images'][fname]['class']
            if noisy_class != correct_class:
                num_corrected += 1
        #corrected_list.append(num_corrected/int(0.3*len(image_list)))
        corrected_list.append(num_corrected)

    return corrected_list
    #return [cr/int(0.3*len(image_list)) for cr in corrected_list]

def network_based_measure(model, data_set):
    # return loss and entropy
    batch_size = 256
    num_workers = 6
    dataloader = DataLoader(data_set, batch_size, shuffle=False, num_workers=num_workers)
    
    num_classes = len(data_set.anno_dict['classes'])

    loss = np.empty(0)
    entropy = np.empty(0)
    
    # prediction in batchwise
    for X, y in dataloader:
        X = X.numpy(); y = y.numpy()
        y_pred = model.pred.eval({model.X: X})
        loss_batch = -np.log(np.sum(y_pred * y, axis=1))
        entropy_batch = -np.sum(y_pred * np.log(y_pred), axis=1)
        loss = np.concatenate((loss,loss_batch), axis=0)
        entropy = np.concatenate((entropy, entropy_batch))

    return loss, entropy

# main code

#x = [0.0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3]
#x = [0.001]
#x = [0.1, 0.5, 0.9]
x = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]

rnd = review(x, 'random')
ls = review(x, 'loss')
etp = review(x, 'entropy')
inf1 = review(x, 'influence')
inf2 = review(x, 'influence-sum')
# vector
# vector frz
# vector cos

print(rnd)
print(ls)
print(etp)
print(inf1)
print(inf2)

# draw a graph
fig, ax = plt.subplots(1,1, figsize=(9,9))
_ = ax.plot(x, rnd, color='b', label='random')
_ = ax.plot(x, ls, color='g', label='loss')
_ = ax.plot(x, etp, color='r', label='entropy') 
_ = ax.plot(x, inf1, color='y', label='influence') 
_ = ax.plot(x, inf2, color='c', label='influence-sum') 
_ = ax.set_title('Recovery results')
_ = ax.set_ylabel('Ratio of corrected labeled: $Num_{corrected}/Num_{mislabeled}$')
_ = ax.set_xlabel('Ratio of reviewed data: $Num_{reviewed}/Num_{total}$')
_ = ax.set_xticks(x)
_ = plt.legend()
plt.savefig('./images/recovery_results.png', bbox_inches='tight')
plt.show()

# wasted!!

In [None]:
# DO THIS FOR SEVERAL EXAMPLES

# FIXME
# # network trained with noisy dataset, influence value with noisy dataset
# train_set = dataset.LazyDataset(root_dir, trainval_list, noisy_anno_dict) # noisy dataset
# save_dir = './result_net_ns_if_ns' 
# restore_dir = '/Data/checkpts/emnist/model_fold_1_trainval_ratio_0.3.dnn'

# network trained with non-noisy dataset, influence value with non-noisy dataset
train_set = dataset.LazyDataset(root_dir, trainval_list, anno_dict) # non-noisy dataset
save_dir = './result_net_nn_if_nn'
restore_dir = '/Data/checkpts/emnist/model_fold_1_trainval_ratio_0.0.dnn' # non-noisy network

for idx_test in range(13, 14):
    # Set a single test image

    # # Re-sample a test instance
    # test_list, _ = dataset.read_data_subset(root_dir, mode='validation1', sample_size=100)
    # test_set = dataset.LazyDataset(root_dir, test_list, anno_dict)
    
    # Restore weights
    net.logits.restore(restore_dir)

    params = net.logits.parameters

    name_test = test_list[idx_test]
    img_test, lb_test = test_set.__getitem__(idx_test)
    show_image_from_data(img_test)
    v_test = net.loss.grad({net.X:img_test, net.y:lb_test}, wrt=params)
    
    lb_true = anno_dict['classes'][str(np.argmax(lb_test))]
    lb_pred = anno_dict['classes'][str(np.argmax(net.logits.eval({net.X:img_test})))]
    print('testfile name: ', name_test)
    print('ground truth label: ', lb_true)
    print('network prediction: ', lb_pred)

    save_path = os.path.join(save_dir, name_test.split('.')[0])
    if not os.path.exists(save_path):
        # make folder
        os.makedirs(save_path)
        
    scipy.misc.imsave(save_path+'/test_reference_true_{}_pred_{}.png'.format(lb_true,lb_pred), np.squeeze(img_test))

    np.save(save_path+'/trainval_list', trainval_list)
    np.save(save_path+'/test_list', test_list)

    # CALCULATE IF WITH FREEZED NETWORK

    params = net.loss.parameters
    p_ftex = net.d['dense1'].parameters
    p_logreg = tuple(set(params) - set(p_ftex)) # extract the weights of the last-layer (w,b)
    print(p_logreg)
    v_logreg = net.loss.grad({net.X:img_test, net.y:lb_test}, wrt=p_logreg)

    # Calculate influence functions

    # the solution which is converged properly can be found within 30 iterations, otherwise does not converge
    t1 = time.time()
    ihvp_cg_logreg = get_inverse_hvp_cg(net, net.loss, v_logreg, train_set,**{'damping':0.0, 'maxiter':30})
    IF_cg_logreg = IF_val(net, ihvp_cg_logreg, train_set)
    print('CG_logreg takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_cg_logreg),min(IF_cg_logreg)]))

    np.save(save_path+'/if_cg_logreg.npy', IF_cg_logreg)

    # otherwise, load
    IF_cg_logreg = np.load(save_path+'/if_cg_logreg.npy')

    # t1 = time.time()
    # ihvp_ncg_logreg = get_inverse_hvp_ncg(net, net.loss, v_logreg, train_set,**{'damping':0.1, 'maxiter':3})
    # IF_ncg_logreg = IF_val(net, ihvp_ncg_logreg, train_set)
    # print('NCG_logreg takes {} sec, and its value {}'.format(time.time()-t1, IF_ncg_logreg))
   
    visualize_topk_samples(IF_cg_logreg, train_set, num_sample=5, save_path=save_path)

# tsne
tsne.py 참고

# Retraining
bootstrapping 참고

그리고 SE도 돌려볼 것.
그리고 여러 z_test의 if에 대해서 summation을 한 후 sorting 보기
(그래서 전체적으로 성능을 나쁘게 하는 녀석이 있는지 찾기)

In [None]:
[trainval_list]
