In [None]:
import cntk as C
from cntk.device import try_set_default_device, gpu
try_set_default_device(gpu(0))

import numpy as np
import time
import os

from torch.utils.data import DataLoader

In [None]:
# Hessian Vector Product

def grad_inner_product(grad1, grad2):
    # inner product for dictionary-format gradients (output scalar value)
    
    val = 0
    
    assert(len(grad1)==len(grad2))
    
    for ks in grad1.keys():
        val += np.sum(np.multiply(grad1[ks],grad2[ks]))
        
    return val

def weight_update(w, v, r):
    # w: weights of neural network (tuple)
    # v: value for delta w (dictionary, e.g., gradient value)
    # r: hyperparameter for a gradient (scalar)

    for p in w:
        p.value += r * v[p]

def HVP(y, x, v):
    # Calculate Hessian vector product 
    # y: scalar function to be differentiated (function, e.g. cross entropy loss)
    # x: feed_dict value for the network (dictionary, e.g. {model.X: image_batch, model.y: label_batch})
    # v: vector to be producted (by Hessian) (numeric dictionary, e.g., g(z_test))
    ## w: variables to differentiate (numeric, e.g. neural network weight)
    
    # hyperparameter r
    r = 1e-2
    
    assert type(x)==dict, "Input of HVP is wrong. 'x' should be dictionary(feed dict format)"
    assert type(v)==dict, "Input of HVP is wrong. 'v' should be dictionary(weight:value format)"

    w = v.keys()
    
    # gradient for plus
    weight_update(w, v, +r)
    g_plus = y.grad(x, wrt=w)
  
    # gradient for minus
    weight_update(w, v, -2*r)
    g_minus = y.grad(x, wrt=w)
    
    # weight reconstruction
    weight_update(w, v, +r)
    
    hvp = {ks: (g_plus[ks] - g_minus[ks])/(2*r) for ks in g_plus.keys()}
       
    return hvp


In [None]:
# Newton-Conjugate Gradient

from scipy.optimize import fmin_ncg

def dic2vec(dic):
    # convert a dictionary with matrix values to a 1D vector
    # e.g. gradient of network -> 1D vector
    vec = np.concatenate([val.reshape(-1) for val in dic.values()])
    
    return vec

def vec2dic(vec, fmt):
    # convert a 1D vector to a dictionary of format fmt
    # fmt = {key: val.shape for (key,val) in dict}
    fmt_idx = [np.prod(val) for val in fmt.values()]
    #lambda ls, idx: [ls[sum(idx[:i]):sum(idx[:i+1])] for i in range(len(idx))]
    vec_split = [vec[sum(fmt_idx[:i]):sum(fmt_idx[:i+1])] for i in range(len(fmt_idx))]
    dic = {key: vec_split[i].reshape(shape) for (i,(key,shape)) in enumerate(fmt.items())}

    return dic

def get_inverse_hvp_ncg(model, y, v, data_set, **kwargs):
    # return x, which is the solution of QP, whose value is H^-1 v
    # kwargs: hyperparameters for conjugate gradient
    batch_size = kwargs.pop('batch_size', 50)
    damping = kwargs.pop('damping', 0.0)
    avextol = kwargs.pop('avextol', 1e-8)
    maxiter = kwargs.pop('maxiter', 1e1)
    
    dataloader = DataLoader(data_set, batch_size, shuffle=True, num_workers=6)
    
    t0 = time.time()
    get_inverse_hvp_ncg.cnt = 0

    def HVP_minibatch_val(y, v):
        # Calculate Hessian vector product w.r.t whole dataset
        # y: scalar function output of the neural network (e.g. model.loss)
        # v: vector to be producted by inverse hessian (i.e.H^-1 v) (numeric dictionary, e.g. v_test)
        
        ## model: neural network model (e.g. model)
        ## dataloader: training set dataloader
        ## damping: damp term to make hessian convex

        hvp_batch = {ks: [] for ks in v.keys()}

        for img, lb in dataloader:
            img = img.numpy(); lb = lb.numpy()
            x_feed = {model.X: img, model.y:lb}
            hvp = HVP(y,x_feed,v)
            # add hvp value
            [hvp_batch[ks].append(hvp[ks]/img.shape[0]) for ks in hvp.keys()]

        hvp_mean = {ks: np.mean(hvp_batch[ks], axis=0) + damping*v[ks] for ks in hvp_batch.keys()}

        return hvp_mean

    def get_fmin_loss(x):
        x_dic = vec2dic(x, {key: val.shape for (key, val) in v.items()})
        hvp_val = HVP_minibatch_val(y, x_dic)

        return 0.5 * grad_inner_product(hvp_val, x_dic) - grad_inner_product(v, x_dic)

    def get_fmin_grad(x):
        # x: 1D vector
        x_dic = vec2dic(x, {key: val.shape for (key, val) in v.items()})
        hvp_val = HVP_minibatch_val(y, x_dic)
        hvp_flat = dic2vec(hvp_val)
        v_flat = dic2vec(v)

        return hvp_flat - v_flat
    
    def get_fmin_hvp(x, p):
        p_dic = vec2dic(p, {key: val.shape for (key, val) in v.items()})
        hvp_val = HVP_minibatch_val(y, p_dic)
        hvp_flat = dic2vec(hvp_val)

        return hvp_flat

    def ncg_callback(x):
        x_dic = vec2dic(x, {key: val.shape for (key, val) in v.items()})
        print('iteration: {}'.format(get_inverse_hvp_ncg.cnt), ', ', time.time()-t0, '(sec) elapsed')
        print('vector element-wise square: ', grad_inner_product(x_dic, x_dic))
        get_inverse_hvp_ncg.cnt += 1
        
        return 0
    
    fmin_loss_fn = get_fmin_loss
    fmin_grad_fn = get_fmin_grad
    fmin_hvp_fn = get_fmin_hvp
    
    fmin_results = fmin_ncg(\
            f = fmin_loss_fn, x0 = dic2vec(v), fprime = fmin_grad_fn,\
            fhess_p = fmin_hvp_fn, avextol = avextol, maxiter = maxiter, callback=ncg_callback)
    
    return vec2dic(fmin_results, {key: val.shape for (key, val) in v.items()})

In [None]:
# Conjugate Gradient

from scipy.optimize import fmin_cg

def dic2vec(dic):
    # convert a dictionary with matrix values to a 1D vector
    # e.g. gradient of network -> 1D vector
    vec = np.concatenate([val.reshape(-1) for val in dic.values()])
    
    return vec

def vec2dic(vec, fmt):
    # convert a 1D vector to a dictionary of format fmt
    # fmt = {key: val.shape for (key,val) in dict}
    fmt_idx = [np.prod(val) for val in fmt.values()]
    #lambda ls, idx: [ls[sum(idx[:i]):sum(idx[:i+1])] for i in range(len(idx))]
    vec_split = [vec[sum(fmt_idx[:i]):sum(fmt_idx[:i+1])] for i in range(len(fmt_idx))]
    dic = {key: vec_split[i].reshape(shape) for (i,(key,shape)) in enumerate(fmt.items())}

    return dic

def get_inverse_hvp_cg(model, y, v, data_set, **kwargs):
    # return x, which is the solution of QP, whose value is H^-1 v
    # kwargs: hyperparameters for conjugate gradient
    batch_size = kwargs.pop('batch_size', 50)
    damping = kwargs.pop('damping', 0.0)
    maxiter = kwargs.pop('maxiter', 5e1)
    
    dataloader = DataLoader(data_set, batch_size, shuffle=True, num_workers=6)
    
    t0 = time.time()
    get_inverse_hvp_cg.cnt = 0

    def HVP_minibatch_val(y, v):
        # Calculate Hessian vector product w.r.t whole dataset
        # y: scalar function output of the neural network (e.g. model.loss)
        # v: vector to be producted by inverse hessian (i.e.H^-1 v) (numeric dictionary, e.g. v_test)
        
        ## model: neural network model (e.g. model)
        ## dataloader: training set dataloader
        ## damping: damp term to make hessian convex
        
        num_data = data_set.__len__()

        hvp_batch = {key: np.zeros_like(value) for key,value in v.items()}

        for img, lb in dataloader:
            img = img.numpy(); lb = lb.numpy()
            x_feed = {model.X: img, model.y:lb}
            hvp = HVP(y,x_feed,v)
            # add hvp value
            for ks in hvp.keys():
                hvp_batch[ks] += hvp[ks]/num_data
                
        return hvp_batch

    def fmin_loss_fn(x):
        x_dic = vec2dic(x, {key: val.shape for (key, val) in v.items()})
        hvp_val = HVP_minibatch_val(y, x_dic)

        return 0.5 * grad_inner_product(hvp_val, x_dic) - grad_inner_product(v, x_dic)

    def fmin_grad_fn(x):
        # x: 1D vector
        x_dic = vec2dic(x, {key: val.shape for (key, val) in v.items()})
        hvp_val = HVP_minibatch_val(y, x_dic)
        hvp_flat = dic2vec(hvp_val)
        v_flat = dic2vec(v)

        return hvp_flat - v_flat
    
    def cg_callback(x):
        x_dic = vec2dic(x, {key: val.shape for (key, val) in v.items()})
        print('iteration: {}'.format(get_inverse_hvp_cg.cnt), ', ', time.time()-t0, '(sec) elapsed')
        print('vector element-wise square: ', grad_inner_product(x_dic, x_dic))
        get_inverse_hvp_cg.cnt += 1
        
        return 0
    
    fmin_results = fmin_cg(f=fmin_loss_fn, x0=dic2vec(v), fprime=fmin_grad_fn, callback=cg_callback, maxiter=maxiter)
    
    return vec2dic(fmin_results, {key: val.shape for (key, val) in v.items()})

In [None]:
# Stochastic Estimation

def get_inverse_hvp_se(model, y, v, data_set, **kwargs):
    # Calculate inverse hessian vector product over the training set
    # model: neural network model (e.g. model)
    # y: scalar function output of the neural network (e.g. model.loss)
    # v: vector to be producted by inverse hessian (i.e.H^-1 v) (e.g. v_test)
    # data_set: training set to be summed in Hessian
    # kwargs: hyperparameters for stochastic estimation
    
    # hyperparameters
    recursion_depth = kwargs.pop('recursion_depth', 50) # epoch
    scale = kwargs.pop('scale', 1e1) # similar to learning rate
    damping = kwargs.pop('damping', 0.0) # paper reference: 0.01
    batch_size = kwargs.pop('batch_size', 1)
    num_samples = kwargs.pop('num_samples', 1) # the number of samples(:stochatic estimation of IF) to be averaged
    tolerance = kwargs.pop('tolerance', 1e-2) # the difference btw l2 norms of current and previous vector used for early stopping
    verbose = kwargs.pop('verbose', False)
    
    dataloader = DataLoader(data_set, batch_size, shuffle=True, num_workers=6)
    
    inv_hvps = []
    
    params = v.keys()
    
    for i in range(num_samples):
        # obtain num_samples inverse hvps
        cur_estimate = v
        prev_norm = 0
        
        for depth in range(recursion_depth):
            # epoch-scale recursion depth
            t1 = time.time()
            for img, lb in dataloader:
                img = img.numpy(); lb = lb.numpy()
                x_feed = {model.X: img, model.y:lb}
                hvp = HVP(y,x_feed,cur_estimate)
                # cur_estimate = v + (1-damping)*cur_estimate + 1/scale*(hvp/batch_size)
                cur_estimate = {ks: v[ks] + (1-damping/scale)*cur_estimate[ks] - (1/scale)*hvp[ks]/batch_size for ks in cur_estimate.keys()}
            
            if verbose:
                print('#w: \n', list(map(lambda x: x.value, params)), '\n#hvp: \n', hvp, '\n#ihvp: \n', cur_estimate)
            
            cur_norm = np.sqrt(grad_inner_product(cur_estimate,cur_estimate))
            print('Recursion depth: {}, norm: {}, time: {} \n'.format(depth, cur_norm,time.time()-t1))
            
            # divergence check
            if np.isnan(cur_norm):
                print('## The result has been diverged ##')
                break
            
            # convergence check
            if np.abs(cur_norm - prev_norm) <= tolerance:
                # change this to more precise one (<- scipy.fmin_cg also use gnorm)
                print('## Early stopped due to small change')
                break
            prev_norm = cur_norm
        
        inv_hvp = {ks: (1/scale)*cur_estimate[ks] for ks in cur_estimate.keys()}
        inv_hvps.append(inv_hvp)
    
    inv_hvp_val = {ks: np.mean([inv_hvps[i][ks] for i in range(num_samples)], axis=0) for ks in inv_hvps[0].keys()}
    
    return inv_hvp_val

In [None]:
# toy example for inverse HVP (CG, NCG and SE)

class SimpleNet(object):
    def __init__(self):
        self.X = C.input_variable(shape=(1,))
        self.h = C.layers.Dense(1, activation=None, init=C.uniform(1), bias=False)(self.X)
        self.pred = C.layers.Dense(1, activation=None, init=C.uniform(1), bias=False)(self.h)
        self.y = C.input_variable(shape=(1,))
        self.loss = C.squared_error(self.pred, self.y)
        
class SimpleDataset(object):
    def __init__(self, images, labels):
        self._images, self._labels = images, labels
    
    def __getitem__(self, index):
        X = self._images[index]
        y = self._labels[index]
        
        return X, y
    
    def __len__(self):
        return len(self._images)


net = SimpleNet()

params = net.pred.parameters

x_feed = {net.X:np.array([[2.]],dtype=np.float32), net.y:np.array([[1.]],dtype=np.float32)}
v_feed = {p: np.ones_like(p.value) for p in params}

print('w1 = \n', params[0].value, '\nw2 = \n', params[1].value, '\nloss = \n', net.loss.eval(x_feed))
params[0].value = np.asarray([[1.]])
params[1].value = np.asarray([[1./3.]])
print('w1 = \n', params[0].value, '\nw2 = \n', params[1].value, '\nloss = \n', net.loss.eval(x_feed))

print('hvp', HVP(net.loss, x_feed, v_feed))

#images = np.asarray([[2.],[2.]], dtype=np.float32)
#labels = np.asarray([[1.],[1.]], dtype=np.float32)
images = np.asarray([[2.]], dtype=np.float32)
labels = np.asarray([[1.]], dtype=np.float32)

train_set = SimpleDataset(images,labels)

print('######## damping = 0.0, desired solution: [1.25, -0.08] ########'); t1 = time.time()
ihvp_ncg = get_inverse_hvp_ncg(net, net.loss, v_feed, train_set, **{'damping': 0.0}); t2 = time.time()
ihvp_cg = get_inverse_hvp_cg(net, net.loss, v_feed, train_set, **{'damping': 0.0}); t3 = time.time()
ihvp_se = get_inverse_hvp_se(net, net.loss, v_feed, train_set, **{'damping': 0.0, 'recursion_depth': 100}); t4 = time.time()
print('inverse hvp_ncg', ihvp_ncg, '\ntime: ', t2-t1)
print('inverse hvp_cg', ihvp_cg, '\ntime: ', t3-t2 )
print('inverse hvp_se', ihvp_se, '\ntime: ', t4-t3)

# print('inverse hvp_ncg', get_inverse_hvp_ncg(net, net.loss, v_feed, train_set, **{'damping': 0.1}))
# print('inverse hvp_cg', get_inverse_hvp_cg(net, net.loss, v_feed, train_set, **{'damping': 0.1}))
# print('inverse hvp_se', get_inverse_hvp_se(net, net.loss, v_feed, train_set, **{'scale':10, 'damping':0.1}))

In [1]:
import os, sys
sys.path.append('/Data/github/data_analysis/dataset-analysis-new/')
import json

from datasets import dataset as dataset

import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
import scipy.misc
from torch.utils.data import DataLoader

In [2]:
def show_image_from_data(img):
    # show image from dataset
    # img: (C,W,H) numpy array
    img_show = np.squeeze(np.transpose(img, [1,2,0]))
    imshow(img_show)
    plt.show()

In [None]:
def IF_val(net, ihvp, data_set, cosine=False):
    # Calculate influence function w.r.t ihvp and data_set
    # This should be done in sample-wise, since the gradient operation will sum up over whole feed-dicted data
    
    # ihvp: inverse hessian vector product (dictionary)
    # data_set: data_set to be feed to the gradient operation (dataset)
    IF_list = []
    
    #params = net.logits.parameters
    params = ihvp.keys()
    
    dataloader = DataLoader(data_set, 1, shuffle=False, num_workers=6)
    
    t1 = time.time()
    for img, lb in dataloader:
        img = img.numpy(); lb = lb.numpy()
        gd = net.loss.grad({net.X:img, net.y:lb}, wrt=params)
        if cosine:
            nrm = np.sqrt(grad_inner_product(gd,gd))
            gd = {k: v/nrm for k,v in gd.items()}
        IF = -grad_inner_product(ihvp, gd) / len(dataloader)
        IF_list.append(IF)
    print('IF_val takes {} sec'.format(time.time()-t1))
        
    return IF_list

def visualize_topk_samples(measure, train_set, num_sample=5, mask=None, verbose='ALL', save_path='./result'):
    # 'ALL': show DISADV / ADV / INF / NEG examples
    # 'ADV': show ADV only
    # 'DIS': show DIS only

    axis = 0 # axis=0 -> column output / axis=1 -> row output
    
    if mask == None:
        argsort = np.argsort(measure)
    else:
        assert(len(mask) == len(measure))
        argsort = list(filter(lambda idx: mask[idx], np.argsort(measure)))
    
    topk = argsort[-1:-num_sample-1:-1]
    botk = argsort[0:num_sample]
    
    if not os.path.exists(save_path):
        # make folder
        os.makedirs(save_path)

    if verbose == 'DIS' or verbose == 'ALL':
        dis = []
        true_label = ''
        print('\n## SHOW {}-MOST DISADVANTAGEOUS EXAMPLES ##\n'.format(num_sample))
        for idx in topk:
            img, lb = train_set.__getitem__(idx)
            show_image_from_data(img)
            print('training set name: ', train_set.filename_list[idx])
            print('training set label: ', train_set.anno_dict['classes'][str(np.argmax(lb))])
            print('IF measure: ', measure[idx])
            print(trainval_list[idx])
            dis.append(np.transpose(img,(1,2,0)))
            true_label += train_set.anno_dict['classes'][str(np.argmax(lb))]
        dis = np.squeeze(np.concatenate(dis, axis=axis))
        scipy.misc.imsave(save_path+'/disadvantageous_true_{}.png'.format(true_label), dis)

    if verbose == 'ADV' or verbose == 'ALL':
        adv = []
        true_label = ''
        print('\n## SHOW {}-MOST ADVANTAGEOUS EXAMPLES ##\n'.format(num_sample))
        for idx in botk:
            img, lb = train_set.__getitem__(idx)
            show_image_from_data(img)
            print('training set name: ', train_set.filename_list[idx])
            print('training set label: ', train_set.anno_dict['classes'][str(np.argmax(lb))])
            print('IF measure: ', measure[idx])
            print(trainval_list[idx])
            adv.append(np.transpose(img,(1,2,0)))
            true_label += train_set.anno_dict['classes'][str(np.argmax(lb))]
        adv = np.squeeze(np.concatenate(adv, axis=axis))
        scipy.misc.imsave(save_path+'/advantageous_true_{}.png'.format(true_label), adv)

    if verbose == 'ALL':
        
        if mask == None:
            argsort_abs = np.argsort(np.abs(measure))
        else:
            assert(len(mask) == len(measure))
            argsort_abs = list(filter(lambda idx: mask[idx], np.argsort(np.abs(measure))))

        topk_abs = argsort_abs[-1:-num_sample-1:-1]
        botk_abs = argsort_abs[0:num_sample]
        
        inf = []
        true_label = ''
        print('\n## SHOW {}-MOST INFLUENTIAL EXAMPLES ##\n'.format(num_sample))
        for idx in topk_abs:
            img, lb = train_set.__getitem__(idx)
            show_image_from_data(img)
            print('training set name: ', train_set.filename_list[idx])
            print('training set label: ', train_set.anno_dict['classes'][str(np.argmax(lb))])
            print('IF measure: ', measure[idx])
            inf.append(np.transpose(img,(1,2,0)))
            true_label += train_set.anno_dict['classes'][str(np.argmax(lb))]
        inf = np.squeeze(np.concatenate(inf, axis=axis))
        scipy.misc.imsave(save_path+'/influential_true_{}.png'.format(true_label), inf)

        neg = []
        true_label = ''
        print('\n## SHOW {}-MOST NEGLIGIBLE EXAMPLES ##\n'.format(num_sample))
        for idx in botk_abs:
            img, lb = train_set.__getitem__(idx)
            show_image_from_data(img)
            print('training set name: ', train_set.filename_list[idx])
            print('training set label: ', train_set.anno_dict['classes'][str(np.argmax(lb))])
            print('IF measure: ', measure[idx])
            neg.append(np.transpose(img,(1,2,0)))
            true_label += train_set.anno_dict['classes'][str(np.argmax(lb))]
        neg = np.squeeze(np.concatenate(neg, axis=axis))
        scipy.misc.imsave(save_path+'/negligible_true_{}.png'.format(true_label), neg)
        
    return 0

In [4]:
# skc dataset
root_dir = '/Data/skc/20180424/original'

# sample size
#trainval_list, anno_dict = dataset.read_data_subset(root_dir, mode='train1', sample_size=10000)
trainval_list, anno_dict = dataset.read_data_subset(root_dir, mode='train1')
test_list, _ = dataset.read_data_subset(root_dir, mode='validation1', sample_size=500)

test_set = dataset.LazyDataset(root_dir, test_list, anno_dict)

# emnist dataset: SANITY CHECK
print(len(test_set), type(test_set))
print(len(trainval_list))

500 <class 'datasets.dataset.LazyDataset'>
55647


In [None]:
# skc network
from models.nn import ResNet_18 as ConvNet

img, lb = test_set.__getitem__(0)
C, H, W = img.shape

hp_d = dict() # hyperparameters for a network
mean = np.load('/Data/github/data_analysis/dataset-analysis-new/output/mean_skc.npy')
hp_d['image_mean'] = np.transpose(np.tile(mean,(H,W,1)),(2,0,1))

net = ConvNet((C,H,W), len(anno_dict['classes']), **hp_d)
net.logits.restore('/Data/checkpts/skc/model_fold_1_trainval_ratio_1.0.dnn')

# skc network: SANITY CHECK
start_time = time.time()
ys, y_preds, test_score, confusion_matrix = net.predict(test_set, **hp_d)
total_time = time.time() - start_time

print('Test error rate: {}'.format(test_score))
print('Total tack time(sec): {}'.format(total_time))
print('Tact time per image(sec): {}'.format(total_time / len(test_list)))
print('Confusion matrix: \n{}'.format(confusion_matrix))

In [None]:
# DO THIS FOR SEVERAL EXAMPLES

# vec v.s. freeze v.s. se

# restore trainval_list, test_list
#file_dir = './compare/result_net_nn_if_nn/train_e_99502'

#trainval_list = list(np.load(file_dir+'/trainval_list.npy'))
train_set = dataset.LazyDataset(root_dir, trainval_list, anno_dict) # non-noisy dataset

#test_list = list(np.load(file_dir+'/test_list.npy'))
test_set = dataset.LazyDataset(root_dir, test_list, anno_dict)

# FIXME
train_set = dataset.LazyDataset(root_dir, trainval_list, anno_dict)
save_dir = './compare/skc/' 
restore_dir = '/Data/checkpts/skc/model_fold_1_trainval_ratio_1.0.dnn'

for idx_test in range(0, 10):
    # Set a single test image

    # # Re-sample a test instance
    # test_list, _ = dataset.read_data_subset(root_dir, mode='validation1', sample_size=100)
    # test_set = dataset.LazyDataset(root_dir, test_list, anno_dict)
    
    # Restore weights
    net.logits.restore(restore_dir)

    params = net.logits.parameters

    name_test = test_list[idx_test]
    img_test, lb_test = test_set.__getitem__(idx_test)
    show_image_from_data(img_test)
    v_test = net.loss.grad({net.X:img_test, net.y:lb_test}, wrt=params)
    
    lb_true = anno_dict['classes'][str(np.argmax(lb_test))]
    lb_pred = anno_dict['classes'][str(np.argmax(net.logits.eval({net.X:img_test})))]
    print('testfile name: ', name_test)
    print('ground truth label: ', lb_true)
    print('network prediction: ', lb_pred)

    save_path = os.path.join(save_dir, name_test.split('.')[0])
    
    if not os.path.exists(save_path):
        # make folder
        os.makedirs(save_path)
        
    scipy.misc.imsave(save_path+'/test_reference_true_{}_pred_{}.png'.format(lb_true,lb_pred), np.transpose(img_test,(1,2,0)))

    np.save(save_path+'/trainval_list', trainval_list)
    np.save(save_path+'/test_list', test_list)

    # CALCULATE IF WITH FREEZED NETWORK

    params = net.loss.parameters
    p_ftex = net.d['conv5_2'].parameters
    p_logreg = tuple(set(params) - set(p_ftex)) # extract the weights of the last-layer (w,b)
    print(p_logreg)
    v_logreg = net.loss.grad({net.X:img_test, net.y:lb_test}, wrt=p_logreg)

    # Calculate influence functions

    # CG-FREEZE (1885 sec)
    t1 = time.time()
    ihvp_cg_logreg = get_inverse_hvp_cg(net, net.loss, v_logreg, train_set,**{'damping':0.0, 'maxiter':50})
    IF_cg_logreg = IF_val(net, ihvp_cg_logreg, train_set)
    print('CG_logreg takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_cg_logreg),min(IF_cg_logreg)]))
    np.save(save_path+'/if_cg_logreg.npy', IF_cg_logreg)
    #IF_cg_logreg = np.load(save_path+'/if_cg_logreg.npy')
    visualize_topk_samples(IF_cg_logreg, train_set, num_sample=5, save_path=save_path+'/cg-frz')
    
    # VECTOR-FREEZE (175 sec)
    t1 = time.time()
    IF_v_logreg = IF_val(net, v_logreg, train_set)
    print('V_logreg takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_v_logreg),min(IF_v_logreg)]))
    np.save(save_path+'/if_v_logreg.npy', IF_v_logreg)
    #IF_v_logreg = np.load(save_path+'/if_v_logreg.npy')
    visualize_topk_samples(IF_v_logreg, train_set, num_sample=5, save_path=save_path+'/vec-frz')

    # Vector-FULL (1688 sec)
    t1 = time.time()
    IF_v = IF_val(net, v_test, train_set)
    print('V takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_v),min(IF_v)]))
    np.save(save_path+'/if_v.npy', IF_v)
    #IF_v = np.load(save_path+'/if_v.npy')
    visualize_topk_samples(IF_v, train_set, num_sample=5, save_path=save_path+'/v')
    
    # VECTOR-FREEZE-cosine-similarity (178 sec)
    t1 = time.time()
    IF_v_cos = IF_val(net, v_logreg, train_set, cosine=True)
    print('V_cos takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_v_cos),min(IF_v_cos)]))
    np.save(save_path+'/if_v_cos.npy', IF_v_cos)
    #IF_v_cos = np.load(save_path+'/if_v_cos.npy')
    visualize_topk_samples(IF_v_cos, train_set, num_sample=5, save_path=save_path+'/vec-cos')

#     # CG-FULL (1epoch, more than 3 hours, did it stuck at line search as it happened in ncg?)
#     t1 = time.time()
#     ihvp_cg = get_inverse_hvp_cg(net, net.loss, v_test, train_set,**{'damping':0.1, 'maxiter':100})
#     IF_cg = IF_val(net, ihvp_cg, train_set)
#     print('CG takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_cg),min(IF_cg)]))
#     np.save(save_path+'/if_cg.npy', IF_cg)
#     visualize_topk_samples(IF_cg, train_set, num_sample=5, save_path=save_path+'/cg')
    
#     # SE-FULL (? sec: diverge)
#     t1 = time.time()
#     ihvp_se = get_inverse_hvp_se(net, net.loss, v_test, train_set,**{'scale':1e5, 'damping':0.1, 'batch_size':50, 'recursion_depth':100})
#     IF_se = IF_val(net, ihvp_se, train_set)
#     print('SE takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_se),min(IF_se)]))
#     np.save(save_path+'/if_se.npy', IF_se)
#     visualize_topk_samples(IF_se, train_set, num_sample=5, save_path=save_path+'/se')
    
    # SE-FREEZE (1065 sec -> 11050)
    t1 = time.time()
    ihvp_se_logreg = get_inverse_hvp_se(net, net.loss, v_logreg, train_set,**{'scale':1e3, 'damping':0.1, 'batch_size':50, 'tolerance':0, 'recursion_depth':65})
    IF_se_logreg = IF_val(net, ihvp_se_logreg, train_set)
    print('SE_logreg takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_se_logreg),min(IF_se_logreg)]))
    np.save(save_path+'/if_se_logreg.npy', IF_se_logreg)
    #IF_se_logreg = np.load(save_path+'/if_se_logreg.npy')
    visualize_topk_samples(IF_se_logreg, train_set, num_sample=5, save_path=save_path+'/se-frz')

In [None]:
# calculate IF measure for each samples of each classes

# WITH SEVERAL METHODOLOGIES

temp_list, _ = dataset.read_data_subset(root_dir, mode='validation1')

print('num_of_samples',len(temp_list))

# restore trainval_list, test_list
#file_dir = './compare/result_net_nn_if_nn/train_e_99502'
#file_dir = './sample/result_net_nn_if_nn/train_B_69574'

#trainval_list = list(np.load(file_dir+'/trainval_list.npy'))
#test_list = list(np.load(file_dir+'/test_list.npy'))

dic = {ks: [] for ks in anno_dict['classes'].values()}

for tl in temp_list:
    dic[tl.split('_')[0]].append(tl)

sample_list = [dic[ks][0] for ks in dic.keys()]
#sample_list = list(np.load('./sample/result_net_nn_if_nn/train_B_69574/sample_list.npy'))
sample_set = dataset.LazyDataset(root_dir, sample_list, anno_dict)

    
print(len(sample_list), sample_list)

save_dir = './sample/skc/result'
restore_dir = '/Data/checkpts/skc/model_fold_1_trainval_ratio_1.0.dnn'
train_set = dataset.LazyDataset(root_dir, trainval_list, anno_dict)


for idx_test in range(len(sample_list)):
    # Set a single test image

    # Restore weights
    net.logits.restore(restore_dir)

    params = net.logits.parameters

    name_test = sample_list[idx_test]
    img_test, lb_test = sample_set.__getitem__(idx_test)
    show_image_from_data(img_test)
    v_test = net.loss.grad({net.X:img_test, net.y:lb_test}, wrt=params)
    
    lb_true = anno_dict['classes'][str(np.argmax(lb_test))]
    lb_pred = anno_dict['classes'][str(np.argmax(net.logits.eval({net.X:img_test})))]
    print('testfile name: ', name_test)
    print('ground truth label: ', lb_true)
    print('network prediction: ', lb_pred)

    save_path = os.path.join(save_dir, name_test.split('.')[0])
    
    if not os.path.exists(save_path):
        # make folder
        os.makedirs(save_path)

    scipy.misc.imsave(save_path+'/test_reference_true_{}_pred_{}.png'.format(lb_true,lb_pred), np.transpose(img_test,(1,2,0)))

    np.save(save_path+'/trainval_list', trainval_list)
    np.save(save_path+'/test_list', test_list)
    np.save(save_path+'/temp_list', temp_list)
    np.save(save_path+'/sample_list', sample_list)

    # CALCULATE IF WITH FREEZED NETWORK

    params = net.loss.parameters
    p_ftex = net.d['conv5_2'].parameters
    p_logreg = tuple(set(params) - set(p_ftex)) # extract the weights of the last-layer (w,b)
    print(p_logreg)
    v_logreg = net.loss.grad({net.X:img_test, net.y:lb_test}, wrt=p_logreg)

    # Calculate influence functions
    ns = 10

    # CG-FREEZE (1885 sec)
    t1 = time.time()
    ihvp_cg_logreg = get_inverse_hvp_cg(net, net.loss, v_logreg, train_set,**{'damping':0.0, 'maxiter':50})
    IF_cg_logreg = IF_val(net, ihvp_cg_logreg, train_set)
    print('CG_logreg takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_cg_logreg),min(IF_cg_logreg)]))
    np.save(save_path+'/if_cg_logreg.npy', IF_cg_logreg)
    #IF_cg_logreg = np.load(save_path+'/if_cg_logreg.npy')
    visualize_topk_samples(IF_cg_logreg, train_set, num_sample=ns, save_path=save_path+'/cg-frz')
    
    # VECTOR-FREEZE (175 sec)
    t1 = time.time()
    IF_v_logreg = IF_val(net, v_logreg, train_set)
    print('V_logreg takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_v_logreg),min(IF_v_logreg)]))
    np.save(save_path+'/if_v_logreg.npy', IF_v_logreg)
    #IF_v_logreg = np.load(save_path+'/if_v_logreg.npy')
    visualize_topk_samples(IF_v_logreg, train_set, num_sample=ns, save_path=save_path+'/vec-frz')

    # Vector-FULL (1688 sec)
    t1 = time.time()
    IF_v = IF_val(net, v_test, train_set)
    print('V takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_v),min(IF_v)]))
    np.save(save_path+'/if_v.npy', IF_v)
    #IF_v = np.load(save_path+'/if_v.npy')
    visualize_topk_samples(IF_v, train_set, num_sample=ns, save_path=save_path+'/v')
    
    # VECTOR-FREEZE-cosine-similarity (178 sec)
    t1 = time.time()
    IF_v_cos = IF_val(net, v_logreg, train_set, cosine=True)
    print('V_cos takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_v_cos),min(IF_v_cos)]))
    np.save(save_path+'/if_v_cos.npy', IF_v_cos)
    #IF_v_cos = np.load(save_path+'/if_v_cos.npy')
    visualize_topk_samples(IF_v_cos, train_set, num_sample=ns, save_path=save_path+'/vec-cos')

#     # CG-FULL (1epoch, more than 3 hours, did it stuck at line search as it happened in ncg?)
#     t1 = time.time()
#     ihvp_cg = get_inverse_hvp_cg(net, net.loss, v_test, train_set,**{'damping':0.1, 'maxiter':100})
#     IF_cg = IF_val(net, ihvp_cg, train_set)
#     print('CG takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_cg),min(IF_cg)]))
#     np.save(save_path+'/if_cg.npy', IF_cg)
#     visualize_topk_samples(IF_cg, train_set, num_sample=ns, save_path=save_path+'/cg')
    
#     # SE-FULL (? sec: diverge)
#     t1 = time.time()
#     ihvp_se = get_inverse_hvp_se(net, net.loss, v_test, train_set,**{'scale':1e5, 'damping':0.1, 'batch_size':50, 'recursion_depth':100})
#     IF_se = IF_val(net, ihvp_se, train_set)
#     print('SE takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_se),min(IF_se)]))
#     np.save(save_path+'/if_se.npy', IF_se)
#     visualize_topk_samples(IF_se, train_set, num_sample=ns, save_path=save_path+'/se')
    
    # SE-FREEZE (1065 sec)
    t1 = time.time()
    ihvp_se_logreg = get_inverse_hvp_se(net, net.loss, v_logreg, train_set,**{'scale':1e3, 'damping':0.1, 'batch_size':50, 'tolerance':0, 'recursion_depth':65})
    IF_se_logreg = IF_val(net, ihvp_se_logreg, train_set)
    print('SE_logreg takes {} sec, and its max/min value {}'.format(time.time()-t1, [max(IF_se_logreg),min(IF_se_logreg)]))
    np.save(save_path+'/if_se_logreg.npy', IF_se_logreg)
    #IF_se_logreg = np.load(save_path+'/if_se_logreg.npy')
    visualize_topk_samples(IF_se_logreg, train_set, num_sample=ns, save_path=save_path+'/se-frz')
    

skc dataset

학습시간: 약 7000sec

전체 데이터셋 사용 시:(55647장 사용 시) se 65 epoch 11050 (약 3시간 걸림)
너무 오래 걸려서 10000장으로 줄임.

CG-frz: 2528, 3175 sec
V-frz: 72 sec
V-full: 305 sec
V-cos: 72 sec
SE-frz: 2067 sec (but not yet converged)