In [None]:
import numpy as np
import pandas as pd
dataset_path = './models_all_checkpoints/experiment_1/logs'
logdir = '../log_nngp_vs_nn'

whole_dataset = np.load(f'{dataset_path}/total_dataset.npy')
whole_labels = np.load(f'{dataset_path}/total_labels.npy')
df_nn = pd.read_csv('../log_nngp_vs_nn/log_nn/nn.csv')
df_nngp = pd.read_csv('../log_nngp_vs_nn/log_nngp/nngp.csv')
assert(sum(list(abs(df_nn['differ_idx'] - df_nngp['differ_idx'])))==0)
differ_indices = list(df_nn['differ_idx'])
diff_nn = list(df_nn['diff'])
diff_nn_std = list(df_nn['diff_std'])
mse_nngp = list(df_nngp['mse'])

import torch
def to_cuda(x):
    if torch.cuda.is_available():
        x = x.cuda()
    return x

h_message = ''

def continuous_seq(*args, **kwargs):
    '''
    >>> return a float to float mapping
    '''
    name = kwargs['name']
    max_v = kwargs['max'] if 'max' in kwargs else np.inf
    min_v = kwargs['min'] if 'min' in kwargs else -np.inf

    if name.lower() in ['h', 'help']:
        print(h_message)
        exit(0)
    elif name.lower() in ['constant',]:
        start_v = float(kwargs['start_v'])
        return lambda x: np.clip(start_v, a_min = min_v, a_max = max_v)
    elif name.lower() in ['linear',]:
        start_v = float(kwargs['start_v'])
        slope = float(kwargs['slope'])
        return lambda x: np.clip(start_v + x * slope, a_min = min_v, a_max = max_v)
    elif name.lower() in ['exp, exponential',]:
        start_v = float(kwargs['start_v'])
        power = float(kwargs['power'])
        interval = int(kwargs['interval']) if 'interval' in kwargs else 1
        return lambda x: np.clip(start_v * power ** (x / float(interval)), a_min = min_v, a_max = max_v)
    elif name.lower() in ['jump',]:
        start_v = float(kwargs['start_v'])
        power = float(kwargs['power'])
        min_jump_pt = int(kwargs['min_jump_pt'])
        jump_freq = int(kwargs['jump_freq'])
        return lambda x: np.clip(start_v * power ** (max(x - min_jump_pt + jump_freq, 0) // jump_freq), a_min = min_v, a_max = max_v)
    else:
        raise ValueError('Unrecognized name: %s'%name)

lr_schedule = {'name': 'jump', 'start_v': 0.001, 'power' : 0.1, 'min_jump_pt': 25, 'jump_freq': 10}
lr_func = continuous_seq(**lr_schedule)

In [None]:
import torch.nn as nn

tracein_estimates = []
for differ_idx in differ_indices:
  data = whole_dataset[differ_idx - 1: differ_idx]
  labels = whole_labels[differ_idx - 1: differ_idx]


  traceinfluence = 0
  for i in range(50):
    epoch = i + 1
    out_path = dataset_path.replace('logs','models')
    temp_model = torch.load(f'{out_path}/model-{epoch}.pkl')
    temp_model.train()

    differ_preds = temp_model(to_cuda(torch.from_numpy(data).float()))
    loss_temp = nn.CrossEntropyLoss()(differ_preds, to_cuda(torch.LongTensor(labels)))
    loss_temp.backward()
    grad_vec = 0
    for p in temp_model.parameters():
      grad_vec = (p.grad.data **2).sum() + grad_vec
      p.grad.data.zero_()
    lr_this_epoch = lr_func(epoch)
    # print(f"grad: {grad_vec.cpu().numpy()}, lr: {lr_this_epoch}")
    traceinfluence = traceinfluence + grad_vec.cpu().numpy() * lr_this_epoch
  tracein_estimates.append(traceinfluence)

print(tracein_estimates)

In [None]:
from torch_influence import LiSSAInfluenceModule
from utils.dataset import *
import torch

import torch.nn.functional as F
from torch_influence import BaseObjective




batch_size = 64
valid_ratio = 0
out_path = dataset_path.replace('logs','models')
damp=0.001
repeat=5
depth=50 #2000
scale=500

class MyObjective(BaseObjective):

    def train_outputs(self, model, batch):
        return model(batch[0])

    def train_loss_on_outputs(self, outputs, batch):
        return F.cross_entropy(outputs, batch[1])  # mean reduction required

    def train_regularization(self, params):
        return 0.0

    # training loss by default taken to be 
    # train_loss_on_outputs + train_regularization

    def test_loss(self, model, params, batch):
        return F.cross_entropy(model(batch[0]), batch[1])  # no regularization in test loss


train_loader, valid_loader, test_loader, classes = cifar10(batch_size=batch_size, valid_ratio=valid_ratio)

model = torch.load(f'{out_path}/model-best.pkl')
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")



module = LiSSAInfluenceModule(
    model=model,
    objective=MyObjective(),  
    train_loader=train_loader,
    test_loader=train_loader, #Since we want to calculate self-influence
    device=device,
    damp=damp,
    repeat=repeat,
    depth=depth,
    scale=scale
)

inf_list = []
for differ_idx in differ_indices:
    # influence scores of training points on themselves
    scores = module.influences([differ_idx-1], [differ_idx-1])
    print(scores)
    inf_list.append(scores.item())

print(inf_list)


In [None]:
import pandas as pd

# Load the data
save_df = pd.DataFrame({'diff_nn': diff_nn, 'diff_nn_std': diff_nn_std, 'mse_nngp':  mse_nngp, 'tracin': tracein_estimates, 'inf_func': inf_list})
save_df.to_csv(f'{logdir}/compare_influence.csv')

In [None]:



import matplotlib.pyplot as plt
import math
import matplotlib.ticker as mtick
from scipy.stats import pearsonr
import seaborn as sns; 

fig, ax = plt.subplots(figsize = (4,2.8))
ax.set_ylim([-0.05, 1.05])

y = (np.sqrt(diff_nn) - min(np.sqrt(diff_nn)))/(max(np.sqrt(diff_nn)) - min(np.sqrt(diff_nn)))
x = (np.array(inf_list) - min(inf_list))/(max(inf_list) - min(inf_list))
sns.regplot(x=x, y=y, label=f'Inf fun (Koh & Liang) (r={pearsonr(x, y).statistic:.2f}, p={pearsonr(x, y).pvalue:.2g})', marker = '+', line_kws={"ls":':'})
x = (np.array(tracein_estimates) - min(tracein_estimates))/(max(tracein_estimates) - min(tracein_estimates))
sns.regplot(x=x, y=y, label=f'TracIn (Pruti et al.) (r={pearsonr(x, y).statistic:.2f}, p={pearsonr(x, y).pvalue:.2g})', marker = '+', line_kws={"ls":':'})
x = (np.sqrt(mse_nngp) - min(np.sqrt(mse_nngp)))/(max(np.sqrt(mse_nngp)) - min(np.sqrt(mse_nngp)))
sns.regplot(x=x, y=y, label=r'$\sqrt{Mean\ distance\ LOOD}$' + f'(r={pearsonr(x, y).statistic:.2f}, p={pearsonr(x, y).pvalue:.2g})', marker = '+', line_kws={"ls":':'})

plt.ylabel('NN l2 prediction difference', fontsize = 12)
plt.xlabel('Estimate score by various methods', fontsize = 12)
plt.legend(fontsize = 12, loc = 'upper center', bbox_to_anchor=(0.45, 1.54))
plt.savefig(f'{logdir}/compare_influence.png', bbox_inches = 'tight')