## Experimenting DkNN

In [1]:
# python stuff

from pathlib import Path as Path
from numpy.random import randint

# Our stuff
from datasets.cifar import Cifar
from models.model_wrap import ModelWrap

# from credibility import get_credibility

# torch stuff
import torch
from torchvision.models import vgg16, VGG16_Weights
from peepholes.peepholes import Peepholes
from peepholes.svd_peepholes import peep_matrices_from_svds as parser_fn
from credibility.DkNN import NearestNeighbor, DkNN


use_cuda = torch.cuda.is_available()
cuda_index = torch.cuda.device_count() - 3
device = torch.device(f"cuda:{cuda_index}" if use_cuda else "cpu")
print(f"Using {device} device")

#--------------------------------
# Dataset 
#--------------------------------
# model parameters
dataset = 'CIFAR100' 
seed = 29
bs = 64
data_path = '/srv/newpenny/XAI/LM/data/CIFAR100'

ds = Cifar(dataset=dataset, data_path=data_path)
ds.load_data(
        batch_size = bs,
        data_kwargs = {'num_workers': 4, 'pin_memory': True},
        seed = seed,
        )

Using cuda:5 device
dataset: CIFAR100
Files already downloaded and verified
Files already downloaded and verified


{'train': <torch.utils.data.dataloader.DataLoader at 0x7f1155fefe60>,
 'val': <torch.utils.data.dataloader.DataLoader at 0x7f11560f1790>,
 'test': <torch.utils.data.dataloader.DataLoader at 0x7f115df98aa0>}

In [3]:
#--------------------------------
# Model 
#--------------------------------
pretrained = True
model_dir = '/srv/newpenny/XAI/LM/models'
model_name = f'vgg16_pretrained={pretrained}_dataset={dataset}-'\
f'augmented_policy=CIFAR10_bs={bs}_seed={seed}.pth'

nn = vgg16(weights=VGG16_Weights.IMAGENET1K_V1)
in_features = 4096
num_classes = len(ds.get_classes()) 
nn.classifier[-1] = torch.nn.Linear(in_features, num_classes)
model = ModelWrap(device=device)
model.set_model(model=nn, path=model_dir, name=model_name, verbose=True)

layers_dict = {'classifier': [0]}
              
model.set_target_layers(target_layers=layers_dict, verbose=True)
print('target layers: ', model.get_target_layers()) 

direction = {'save_input':True, 'save_output':False}
model.add_hooks(**direction, verbose=False) 

dry_img, _ = ds._train_ds.dataset[0]
dry_img = dry_img.reshape((1,)+dry_img.shape)
model.dry_run(x=dry_img)

#--------------------------------
# SVDs 
#--------------------------------
svds_path = Path.cwd()/'../data/svdsBanana'
svds_name = 'svdsBatata' 
model.get_svds(model=model, path=svds_path, name=svds_name, verbose=True)
for k in model._svds.keys():
    print('svd shapes: ', k, model._svds[k]['Vh'].shape)
#--------------------------------
# Peepholes 
#--------------------------------
phs_name = 'peepholes'
phs_dir = Path.cwd()/'../data/peepholes'
peepholes = Peepholes(
        path = phs_dir,
        name = phs_name,
        )
loaders = ds.get_dataset_loaders()
# copy dataset to peepholes dataset
peepholes.get_peep_dataset(
        loaders = loaders,
        verbose = True
        ) 

peepholes.get_activations(
        model=model,
        loaders=loaders,
        verbose=True
        )

peepholes.get_peepholes(
        model = model,
        peep_matrices = model._svds,
        parser = parser_fn,
        verbose = True
        )

  self._checkpoint = torch.load(file, map_location=self.device)



-----------------
checkpoint
-----------------
state_dict keys: 
 odict_keys(['features.0.weight', 'features.0.bias', 'features.2.weight', 'features.2.bias', 'features.5.weight', 'features.5.bias', 'features.7.weight', 'features.7.bias', 'features.10.weight', 'features.10.bias', 'features.12.weight', 'features.12.bias', 'features.14.weight', 'features.14.bias', 'features.17.weight', 'features.17.bias', 'features.19.weight', 'features.19.bias', 'features.21.weight', 'features.21.bias', 'features.24.weight', 'features.24.bias', 'features.26.weight', 'features.26.bias', 'features.28.weight', 'features.28.bias', 'classifier.0.weight', 'classifier.0.bias', 'classifier.3.weight', 'classifier.3.bias', 'classifier.6.weight', 'classifier.6.bias']) 

train_loss 1.4151224618911744
val_loss 0.8791644280883157
train_accuracy 62.295
val_accuracy 74.96000000000001
epoch 59
batch_size 64
lr 0.001
-----------------

target layers:  {'classifier.0': Linear(in_features=25088, out_features=4096, bias=Tru

In [4]:
batch_dict = {key : value for key, value in peepholes._n_samples.items()}
kwargs = {'batch_dict': batch_dict,
          'verbose': True}
ph_dl = peepholes.get_dataloaders(**kwargs)

creating dataloader for:  train
creating dataloader for:  val
creating dataloader for:  test


In [5]:
ph_dl['train']

<torch.utils.data.dataloader.DataLoader at 0x7f1156007a10>

# Initialize DkNN

In [6]:
nb_classes = ds.config['num_classes']
neighbors = 75
percentage = {'train':5,
               'val':1,
               'test':1}

verbose = True

dknn_path = Path.cwd()/'../data/DkNN'
dknn_name = 'DkNN' 
nb_tables = 200
number_bits = 17

In [7]:
kwargs = {'model' : model,
          'nb_classes' : nb_classes,
          'neighbors' : neighbors,
          'ph_dl' : ph_dl,
          'percentage' : percentage, 
          'seed' : seed,
          'verbose' : verbose,
          'path' : dknn_path,
          'name' : dknn_name,
          'nearest_neighbor_backend' : NearestNeighbor.BACKEND.FALCONN,
          'nb_tables' : nb_tables,
          'number_bits' : number_bits,
        }

In [12]:
dknn = DkNN(**kwargs)

---------- DkNN init

File /home/lorenzocapelli/repos/XAI/src/../data/DkNN/['classifier.0']/train_5/val_1 exists.


In [9]:
dknn.calibrate()

---------- DkNN calibrate

## Starting calibration of DkNN


In [10]:
dknn.fprop('all')

---------- DkNN predict


 ---- Getting scores for train

Nonconformity calculated
Saving train to /home/lorenzocapelli/repos/XAI/src/../data/DkNN/['classifier.0']/train_5/val_1/train.

 ---- Getting scores for val

Nonconformity calculated
Saving val to /home/lorenzocapelli/repos/XAI/src/../data/DkNN/['classifier.0']/train_5/val_1/val.

 ---- Getting scores for test

Nonconformity calculated
Saving test to /home/lorenzocapelli/repos/XAI/src/../data/DkNN/['classifier.0']/train_5/val_1/test.


In [13]:
dknn.res

TensorDict(
    fields={
        test: TensorDict(
            fields={
                confs: MemoryMappedTensor(shape=torch.Size([10000]), device=cpu, dtype=torch.float32, is_shared=True),
                creds: MemoryMappedTensor(shape=torch.Size([10000]), device=cpu, dtype=torch.float32, is_shared=True),
                p-value: MemoryMappedTensor(shape=torch.Size([10000, 100]), device=cpu, dtype=torch.float32, is_shared=True),
                preds_knn: MemoryMappedTensor(shape=torch.Size([10000]), device=cpu, dtype=torch.int32, is_shared=True)},
            batch_size=torch.Size([10000]),
            device=cpu,
            is_shared=False),
        train: TensorDict(
            fields={
                confs: MemoryMappedTensor(shape=torch.Size([40000]), device=cpu, dtype=torch.float32, is_shared=True),
                creds: MemoryMappedTensor(shape=torch.Size([40000]), device=cpu, dtype=torch.float32, is_shared=True),
                p-value: MemoryMappedTensor(shape=torch.Si