# Install, Paths and Parameters

In [1]:
import os
from pathlib import Path
import getpass
import numpy as np
import time
import torch
from torch import nn
from tqdm import tqdm
from torch.utils.data import DataLoader
import random
import sys

# allow imports when running script from within project dir
[sys.path.append(i) for i in ['.', '..']]

# local
from src.helpers.helpers import get_random_indexes, get_random_classes
from src.model.dino_model import get_dino, ViTWrapper
from src.model.data import *

# Custom imports
import torchattacks
from torchattacks import *
import torch.optim as optim
from torchvision import transforms as pth_transforms
from torchvision.utils import save_image

# seed
SEED = 42
random.seed(SEED)
torch.manual_seed(SEED)
np.random.seed(SEED)

username = getpass.getuser()
DATA_PATH = Path('/','cluster', 'scratch', 'thobauma', 'dl_data')
MAX_PATH = Path('/','cluster', 'scratch', 'mmathys', 'dl_data')
# Path for intermediate outputs
BASE_POSTHOC_PATH = Path(MAX_PATH, 'posthoc-fixed-labels/')
#BASE_POSTHOC_PATH = Path(MAX_PATH, 'posthoc-subset/')


# Original Dataset
ORI_PATH = Path(DATA_PATH, 'ori_data/')
CLASS_SUBSET_PATH = Path(ORI_PATH, 'class_subset.npy')

TR_PATH = Path(ORI_PATH, 'train/')
TR_ORI_LABEL_PATH = Path(TR_PATH,'correct_labels.txt')
TR_ORI_IMAGES_PATH = Path(TR_PATH,'images')

VAL_PATH = Path(ORI_PATH, 'validation/')
VAL_ORI_LABEL_PATH = Path(VAL_PATH,'correct_labels.txt')
VAL_ORI_IMAGES_PATH = Path(VAL_PATH,'images')

# DAmageNet
DN_PATH = Path(DATA_PATH, 'damageNet')
DN_LABEL_PATH = Path(DN_PATH, 'val_damagenet.txt')
DN_IMAGES_PATH = Path(DN_PATH, 'images')
DN_POSTHOC_PATH = Path(BASE_POSTHOC_PATH, 'damagenet')
DN_POSTHOC_LABEL_PATH = Path(DN_POSTHOC_PATH, 'labels.csv')

# PGD
TR_PGD_PATH = Path(MAX_PATH, 'adversarial_data/pgd_06/train')
TR_PGD_LABEL_PATH = TR_ORI_LABEL_PATH
TR_PGD_IMAGES_PATH = Path(TR_PGD_PATH, 'images')
TR_PGD_POSTHOC_PATH = Path(BASE_POSTHOC_PATH, 'pgd/train/')
TR_PGD_POSTHOC_LABEL_PATH = Path(TR_PGD_POSTHOC_PATH, 'labels.csv')

VAL_PGD_PATH = Path(MAX_PATH, 'adversarial_data/pgd_06/validation')
VAL_PGD_LABEL_PATH = VAL_ORI_LABEL_PATH
VAL_PGD_IMAGES_PATH = Path(VAL_PGD_PATH, 'images')
VAL_PGD_POSTHOC_PATH = Path(BASE_POSTHOC_PATH, 'pgd/validation/')
VAL_PGD_POSTHOC_LABEL_PATH = Path(VAL_PGD_POSTHOC_PATH, 'labels.csv')

# CW
TR_CW_PATH = Path(MAX_PATH, 'adversarial_data/cw/train')
TR_CW_LABEL_PATH = TR_ORI_LABEL_PATH
TR_CW_IMAGES_PATH = Path(TR_CW_PATH, 'images')
TR_CW_POSTHOC_PATH = Path(BASE_POSTHOC_PATH, 'cw/train/')
TR_CW_POSTHOC_LABEL_PATH = Path(TR_CW_POSTHOC_PATH, 'labels.csv')

VAL_CW_PATH = Path(MAX_PATH, 'adversarial_data/cw/validation')
VAL_CW_LABEL_PATH = VAL_ORI_LABEL_PATH
VAL_CW_IMAGES_PATH = Path(VAL_CW_PATH, 'images')
VAL_CW_POSTHOC_PATH = Path(BASE_POSTHOC_PATH, 'cw/validation/')
VAL_CW_POSTHOC_LABEL_PATH = Path(VAL_CW_POSTHOC_PATH, 'labels.csv')

# FGSM
TR_FGSM_PATH = Path(MAX_PATH, 'adversarial_data/fgsm_06/train')
TR_FGSM_LABEL_PATH = TR_ORI_LABEL_PATH
TR_FGSM_IMAGES_PATH = Path(TR_FGSM_PATH, 'images')
TR_FGSM_POSTHOC_PATH = Path(BASE_POSTHOC_PATH, 'fgsm/train/')
TR_FGSM_POSTHOC_LABEL_PATH = Path(TR_FGSM_POSTHOC_PATH, 'labels.csv')

VAL_FGSM_PATH = Path(MAX_PATH, 'adversarial_data/fgsm_06/validation')
VAL_FGSM_LABEL_PATH = VAL_ORI_LABEL_PATH
VAL_FGSM_IMAGES_PATH = Path(VAL_FGSM_PATH, 'images')
VAL_FGSM_POSTHOC_PATH = Path(BASE_POSTHOC_PATH, 'fgsm/validation/')
VAL_FGSM_POSTHOC_LABEL_PATH = Path(VAL_FGSM_POSTHOC_PATH, 'labels.csv')

In [2]:
INDEX_SUBSET = None
NUM_WORKERS= 0
PIN_MEMORY=True
#CLASS_SUBSET = np.load(CLASS_SUBSET_PATH)

BATCH_SIZE = 80

DEVICE = 'cuda'

In [3]:
#!python ../setup/collect_env.py

In [4]:
model, linear_classifier = get_dino(model_name='vit_small', patch_size=16, n_last_blocks=4, avgpool_patchtokens=False, device=DEVICE)

Please use the `--pretrained_weights` argument to indicate the path of the checkpoint to evaluate.
Since no pretrained weights have been provided, we load the reference pretrained DINO weights.
Model vit_small built.
Embed dim 1536
We load the reference pretrained linear weights from dino_deitsmall16_pretrain/dino_deitsmall16_linearweights.pth.


In [8]:
class LinearClassifier(nn.Module):
    """Linear layer to train on top of frozen features"""
    def __init__(self, dim, num_labels=1000):
        super(LinearClassifier, self).__init__()
        self.num_labels = num_labels
        self.linear = nn.Linear(dim, num_labels)
        self.linear.weight.data.normal_(mean=0.0, std=0.01)
        self.linear.bias.data.zero_()

    def forward(self, x):
        # flatten
        x = x.view(x.size(0), -1)

        # linear layer
        return self.linear(x)


In [16]:
linear_classifier = LinearClassifier(linear_classifier.linear.in_features, 
                         num_labels=len(CLASS_SUBSET))

linear_classifier.load_state_dict(torch.load("/cluster/scratch/mmathys/dl_data/adversarial_data/adv_classifiers/25_classes" + "/" + "clean.pt"))

<All keys matched successfully>

In [19]:
from sklearn import preprocessing

label_encoder = preprocessing.LabelEncoder()
label_encoder.fit([i for i in CLASS_SUBSET])

LabelEncoder()

# Load data

In [17]:
datasets_paths = {
            'cw':{ 
                'b':{
                    'train':{
                        'label':TR_ORI_LABEL_PATH,
                        'images':TR_CW_IMAGES_PATH
                    },
                    'val':
                    {
                        'label':VAL_ORI_LABEL_PATH,
                        'images':VAL_CW_IMAGES_PATH
                    }
                }
            },
            'fgsm_06':{
                'b':{
                    'train':{
                        'label':TR_ORI_LABEL_PATH,
                        'images':TR_FGSM_IMAGES_PATH
                    },
                    'val':
                    {
                        'label':VAL_ORI_LABEL_PATH,
                        'images':VAL_FGSM_IMAGES_PATH
                    }
                 }
            },
            'pgd_06':{
                'b':{
                    'train':{
                        'label':TR_ORI_LABEL_PATH,
                        'images':TR_PGD_IMAGES_PATH
                    },
                    'val':
                    {
                        'label':VAL_ORI_LABEL_PATH,
                        'images':VAL_PGD_IMAGES_PATH
                    }
                }
            }
}

# Wrap model

In [5]:
model_wrap = ViTWrapper(model, linear_classifier, device=DEVICE, n_last_blocks=4, avgpool_patchtokens=False)
model_wrap = model_wrap.to(DEVICE)

In [14]:
dataset = AdvTrainingImageDataset("/cluster/scratch/thobauma/data/ori/filtered/train/images/", "/cluster/scratch/thobauma/data/ori/filtered/train/labels.csv", ORIGINAL_TRANSFORM, index_subset=None)

In [21]:
data_loader = DataLoader(dataset, batch_size=2, num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY, shuffle=False) 

In [22]:
data = None

for b in data_loader:
    data = b
    break

In [23]:
data

[tensor([[[[0.3216, 0.3255, 0.3490,  ..., 0.2627, 0.3020, 0.2510],
           [0.3333, 0.3412, 0.3255,  ..., 0.2902, 0.3059, 0.2392],
           [0.3098, 0.3098, 0.3176,  ..., 0.2863, 0.2941, 0.2471],
           ...,
           [0.4706, 0.6314, 0.7608,  ..., 0.4588, 0.3569, 0.4078],
           [0.7725, 0.8549, 0.9216,  ..., 0.5961, 0.5373, 0.5333],
           [0.6941, 0.6235, 0.6039,  ..., 0.7412, 0.6784, 0.6941]],
 
          [[0.3137, 0.3294, 0.3294,  ..., 0.3490, 0.3647, 0.2824],
           [0.3255, 0.3373, 0.3333,  ..., 0.3294, 0.3490, 0.2627],
           [0.3176, 0.3176, 0.3059,  ..., 0.3176, 0.3294, 0.2824],
           ...,
           [0.3882, 0.5294, 0.6706,  ..., 0.3686, 0.2627, 0.2824],
           [0.7294, 0.8196, 0.9216,  ..., 0.5216, 0.4510, 0.3961],
           [0.7020, 0.6471, 0.6431,  ..., 0.6431, 0.5686, 0.5647]],
 
          [[0.3294, 0.3529, 0.3569,  ..., 0.4078, 0.4824, 0.3765],
           [0.3647, 0.3686, 0.3686,  ..., 0.3686, 0.4275, 0.3020],
           [0.3333, 0.33

In [30]:
with torch.no_grad():
    output = model(data[0].to('cuda'))

In [31]:
output.argmax(1)

tensor([245, 304], device='cuda:0')

# Generate labels

In [32]:
with torch.no_grad():
    for attack in datasets_paths:
        for d in ['train', 'val']:

            dataset = AdvTrainingImageDataset(datasets_paths[attack]['b'][d]['images'], datasets_paths[attack]['b'][d]['label'], ONLY_NORMALIZE_TRANSFORM, CLASS_SUBSET, index_subset=None, label_encoder=label_encoder)
            data_loader = DataLoader(dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY, shuffle=False)
            total = len(dataset)
            
            STORE_PATH = str(datasets_paths[attack]['b'][d]['images']).replace('/images', '/new_labels.csv')
            print(STORE_PATH)
            adv_labels = {}
            correct = 0
            
            for images, labels, img_names in tqdm(data_loader):

                labels = labels.to(DEVICE)
                images = images.to(DEVICE)
                
                outputs = model_wrap(images)

                _, pre = torch.max(outputs.data, 1)

                correct += (pre == labels).sum()

                for i in range(images.shape[0]):
                    adv_labels[img_names[i]] = pre.cpu().numpy()[i]

            print(f'Accuracy against {attack} {dataset}: %.2f %%' % (100 * float(correct) / total))

            df = pd.DataFrame.from_dict(adv_labels, orient='index')
            df.to_csv(STORE_PATH, sep=" ", header=False)

  0%|          | 0/403 [00:00<?, ?it/s]

/cluster/scratch/mmathys/dl_data/adversarial_data/cw/train/new_labels.csv


100%|██████████| 403/403 [02:32<00:00,  2.65it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

Accuracy against cw <src.model.data.AdvTrainingImageDataset object at 0x2b69f352e430>: 95.25 %
/cluster/scratch/mmathys/dl_data/adversarial_data/cw/validation/new_labels.csv


100%|██████████| 16/16 [00:06<00:00,  2.67it/s]


Accuracy against cw <src.model.data.AdvTrainingImageDataset object at 0x2b69f352ef70>: 93.68 %


  0%|          | 0/403 [00:00<?, ?it/s]

/cluster/scratch/mmathys/dl_data/adversarial_data/fgsm_06/train/new_labels.csv


100%|██████████| 403/403 [02:36<00:00,  2.58it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

Accuracy against fgsm_06 <src.model.data.AdvTrainingImageDataset object at 0x2b69f352ecd0>: 12.85 %
/cluster/scratch/mmathys/dl_data/adversarial_data/fgsm_06/validation/new_labels.csv


100%|██████████| 16/16 [00:06<00:00,  2.63it/s]


Accuracy against fgsm_06 <src.model.data.AdvTrainingImageDataset object at 0x2b6a2b94b100>: 14.64 %


  0%|          | 0/403 [00:00<?, ?it/s]

/cluster/scratch/mmathys/dl_data/adversarial_data/pgd_06/train/new_labels.csv


100%|██████████| 403/403 [02:35<00:00,  2.59it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

Accuracy against pgd_06 <src.model.data.AdvTrainingImageDataset object at 0x2b6a3616eb50>: 0.00 %
/cluster/scratch/mmathys/dl_data/adversarial_data/pgd_06/validation/new_labels.csv


100%|██████████| 16/16 [00:06<00:00,  2.63it/s]

Accuracy against pgd_06 <src.model.data.AdvTrainingImageDataset object at 0x2b6a2b94b550>: 0.00 %



