In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '7'


from pytorch_lightning import utilities as pl_utils
from pytorch_lightning.trainer.trainer import Trainer
from pytorch_lightning.plugins import DDPPlugin
import torch
import torch.nn as nn
import pathlib
import sys
import numpy as np

sys.path.append('..')
sys.path.append('../deep-learning-base')
sys.path.append('../partially_inverted_reps')

import plot_helper as plt_hp
import output as out
import architectures as arch
from architectures.callbacks import LightningWrapper
from datasets.data_modules import DATA_MODULES
import datasets.dataset_metadata as dsmd
from partial_loss import PartialInversionLoss, PartialInversionRegularizedLoss
from __init__ import DATA_PATH_IMAGENET, DATA_PATH, SERVER_PROJECT_PATH
from functools import partial
import stir.model.tools.helpers as helpers
import stir
import itertools

  "class": algorithms.Blowfish,


In [2]:
SOURCE_DATASET = 'imagenet'
EVAL_DATASETS = [SOURCE_DATASET, 'cifar10', 'cifar100', 'flowers', 'oxford-iiit-pets']

CHECKPOINT_PATHS = {
    'nonrob': '',
    'robustl2eps3': '/NS/robustness_2/work/vnanda/adv-robustness/logs/robust_imagenet/eps3/resnet-50-l2-eps3.ckpt'
}
APPENDS = CHECKPOINT_PATHS.keys()
SEED = 2
MODEL = 'resnet50'

PARTIAL_CHOICE_SEEDS = list(range(1,6))
PARTIAL_FRACTIONS = [0.0005, 0.001, 0.002, 0.003, 0.004, 0.005, 0.01, 0.05, 0.1, 0.2, 0.3, 0.5, 0.8, 0.9, 1.]
BATCH_SIZE = 50

In [3]:
class MaskedModel(nn.Module):
    def __init__(self, model, mask):
        super().__init__()
        self.mask = mask
        self.model = model
    
    def forward(self, x, *args, **kwargs):
        out, latent = self.model(x, *args, **kwargs)
        return out, latent[:,self.mask]

In [4]:
def find_chosen_neurons(m1):
    frac_to_chosen_neurons = {}
    for partial_seed in PARTIAL_CHOICE_SEEDS:
        for frac in PARTIAL_FRACTIONS:
            name, param = list(m1.model.named_modules())[-1]
            in_fts = param.in_features
            num_neurons = int(frac * in_fts)
            linear = nn.Linear(num_neurons, dsmd.DATASET_PARAMS[SOURCE_DATASET]['num_classes'])
            torch.manual_seed(partial_seed)
            chosen_neurons = torch.randperm(in_fts)[:num_neurons]

            if frac in frac_to_chosen_neurons:
                frac_to_chosen_neurons[frac].append(chosen_neurons)
            else:
                frac_to_chosen_neurons[frac] = [chosen_neurons]
    return frac_to_chosen_neurons

In [None]:
results = []
for eval_ds in EVAL_DATASETS:
    append_to_frac_ckas = {}
    dm = DATA_MODULES[eval_ds](
        data_dir=DATA_PATH_IMAGENET if 'imagenet' in SOURCE_DATASET else DATA_PATH,
        transform_train=dsmd.TEST_TRANSFORMS_DEFAULT(224),
        transform_test=dsmd.TEST_TRANSFORMS_DEFAULT(224),
        batch_size=BATCH_SIZE)
    dm.init_remaining_attrs(eval_ds)
    for append in APPENDS:
        m1 = arch.create_model(MODEL, SOURCE_DATASET, pretrained=True,
                               checkpoint_path=CHECKPOINT_PATHS[append], seed=SEED, 
                               callback=partial(LightningWrapper, 
                                                dataset_name=SOURCE_DATASET,
                                                inference_kwargs={'with_latent': True}))
        frac_to_chosen_neurons = find_chosen_neurons(m1)
        frac_to_ckas = {}
        for frac in PARTIAL_FRACTIONS:
            for mask1, mask2 in itertools.combinations(frac_to_chosen_neurons[frac], 2):
                stir_score = stir.STIR(MaskedModel(m1, mask1), MaskedModel(m1, mask2), 
                    helpers.InputNormalize(dsmd.STANDARD_MEAN, dsmd.STANDARD_STD), 
                    helpers.InputNormalize(dsmd.STANDARD_MEAN, dsmd.STANDARD_STD),
                    (dm.train_dataloader(), 1000), verbose=False, layer1_num=None, 
                    layer2_num=None, no_opt=True, cka_only=True)
                if frac in frac_to_ckas:
                    frac_to_ckas[frac].append(stir_score.rsm)
                else:
                    frac_to_ckas[frac] = [stir_score.rsm]

        append_to_frac_ckas[append] = frac_to_ckas
    results.append(append_to_frac_ckas)

Global seed set to 2
Global seed set to 0


In [None]:
plt_str = '== CKA Analysis ==\n\n'

for idx, eval_ds in enumerate(EVAL_DATASETS):
    plt_str += f'=== {eval_ds} ===\n\n'
    for append in APPENDS:
        frac_to_ckas = results[idx][append]
        full_cka = frac_to_ckas[1.]
        remaining_cka = {k:v for k,v in frac_to_ckas.items() if k!=1}
        x_vals, y_vals = list(zip(*sorted(remaining_cka.items(), key=lambda t: t[0])))
        plt_str += '== {} ==\n\n{}\n\n'.format(
            append,
            plt_hp.get_wiki_link(plt_hp.line_plot(
                [[np.mean(_y) for _y in y_vals]], 'Fraction of neurons', 'CKA', f'Eval On {eval_ds}', 
                subfolder=SOURCE_DATASET, filename=f'{MODEL}-{append}-{eval_ds}', extension='png', 
                x_vals=x_vals, 
                legend_vals=['', 'Full Layer'], vertical_line=None, 
                horizontal_lines=[np.mean(full_cka)], horizontal_lines_err=[np.std(full_cka)], 
                colors=None, linestyles=['-', '--'],
                y_lims=(0.,1.1), root_dir='.', paper_friendly_plots=False, 
                plot_inside=False, legend_location='best', savefig=True, figsize=(10,6), 
                marker=[True], results_subfolder_name='cka_analysis', 
                grid_spacing=None, y_err=[[np.std(_y) for _y in y_vals]], legend_ncol=None), 
                                 SERVER_PROJECT_PATH, size=1000))

In [None]:
with open(f'./results/cka_analysis/{SOURCE_DATASET}/wiki_results.txt', 'w') as fp:
    fp.write(plt_str)

out.upload_results(['{}/{}/{}'.format(plt_hp.RESULTS_FOLDER_NAME, 'cka_analysis', SOURCE_DATASET)], 
        'results', SERVER_PROJECT_PATH, '.png')