In [1]:
# This extension reloads external Python files
import os
from pathlib import Path
import getpass
import numpy as np
import pandas as pd
import time
import math
import torch
from torch import nn
from tqdm import tqdm
import random
import sys
from torch.utils.data import random_split
from matplotlib import pyplot as plt

# allow imports when running script from within project dir
[sys.path.append(i) for i in ['.', '..']]

# local
from src.helpers.helpers import get_random_indexes, get_random_classes
from src.model.dino_model import get_dino
from src.model.data import create_loader, adv_dataset, ORIGINAL_TRANSFORM, ONLY_NORMALIZE_TRANSFORM

# seed
SEED = 42
random.seed(SEED)
torch.manual_seed(SEED)
np.random.seed(SEED)

username = getpass.getuser()
DATA_PATH = Path('/','cluster', 'scratch', 'thobauma', 'dl_data')
MAX_PATH = Path('/','cluster', 'scratch', 'mmathys', 'dl_data')
# Path for intermediate outputs
BASE_POSTHOC_PATH = Path(MAX_PATH, 'posthoc/')

# Original Dataset
ORI_PATH = Path(DATA_PATH, 'ori_data/')
CLASS_SUBSET_PATH = Path(ORI_PATH, 'class_subset.npy')

TR_PATH = Path(ORI_PATH, 'train/')
TR_ORI_LABEL_PATH = Path(TR_PATH,'correct_labels.txt')
TR_ORI_IMAGES_PATH = Path(TR_PATH,'images')

VAL_PATH = Path(ORI_PATH, 'validation/')
VAL_ORI_LABEL_PATH = Path(VAL_PATH,'correct_labels.txt')
VAL_ORI_IMAGES_PATH = Path(VAL_PATH,'images')

# DAmageNet
#DN_PATH = Path(DATA_PATH, 'damageNet')
#DN_LABEL_PATH = Path(DN_PATH, 'val_damagenet.txt')
#DN_IMAGES_PATH = Path(DN_PATH, 'images')
#DN_POSTHOC_PATH = Path(BASE_POSTHOC_PATH, 'damagenet')
#DN_POSTHOC_LABEL_PATH = Path(DN_POSTHOC_PATH, 'labels.csv')

# PGD
TR_PGD_PATH = Path(MAX_PATH, 'adversarial_data/pgd_06/train')
TR_PGD_LABEL_PATH = TR_ORI_LABEL_PATH
TR_PGD_IMAGES_PATH = Path(TR_PGD_PATH, 'images')
TR_PGD_POSTHOC_PATH = Path(BASE_POSTHOC_PATH, 'pgd/train/')
TR_PGD_POSTHOC_LABEL_PATH = Path(TR_PGD_POSTHOC_PATH, 'labels.csv')

VAL_PGD_PATH = Path(MAX_PATH, 'adversarial_data/pgd_06/validation')
VAL_PGD_LABEL_PATH = VAL_ORI_LABEL_PATH
VAL_PGD_IMAGES_PATH = Path(VAL_PGD_PATH, 'images')
VAL_PGD_POSTHOC_PATH = Path(BASE_POSTHOC_PATH, 'pgd/validation/')
VAL_PGD_POSTHOC_LABEL_PATH = Path(VAL_PGD_POSTHOC_PATH, 'labels.csv')

# CW
TR_CW_PATH = Path(MAX_PATH, 'adversarial_data/cw/train')
TR_CW_LABEL_PATH = TR_ORI_LABEL_PATH
TR_CW_IMAGES_PATH = Path(TR_CW_PATH, 'images')
TR_CW_POSTHOC_PATH = Path(BASE_POSTHOC_PATH, 'cw/train/')
TR_CW_POSTHOC_LABEL_PATH = Path(TR_CW_POSTHOC_PATH, 'labels.csv')

VAL_CW_PATH = Path(MAX_PATH, 'adversarial_data/cw/validation')
VAL_CW_LABEL_PATH = VAL_ORI_LABEL_PATH
VAL_CW_IMAGES_PATH = Path(VAL_CW_PATH, 'images')
VAL_CW_POSTHOC_PATH = Path(BASE_POSTHOC_PATH, 'cw/validation/')
VAL_CW_POSTHOC_LABEL_PATH = Path(VAL_CW_POSTHOC_PATH, 'labels.csv')

# FGSM
TR_FGSM_PATH = Path(MAX_PATH, 'adversarial_data/fgsm_06/train')
TR_FGSM_LABEL_PATH = TR_ORI_LABEL_PATH
TR_FGSM_IMAGES_PATH = Path(TR_FGSM_PATH, 'images')
TR_FGSM_POSTHOC_PATH = Path(BASE_POSTHOC_PATH, 'fgsm/train/')
TR_FGSM_POSTHOC_LABEL_PATH = Path(TR_FGSM_POSTHOC_PATH, 'labels.csv')

VAL_FGSM_PATH = Path(MAX_PATH, 'adversarial_data/fgsm_06/validation')
VAL_FGSM_LABEL_PATH = VAL_ORI_LABEL_PATH
VAL_FGSM_IMAGES_PATH = Path(VAL_FGSM_PATH, 'images')
VAL_FGSM_POSTHOC_PATH = Path(BASE_POSTHOC_PATH, 'fgsm/validation/')
VAL_FGSM_POSTHOC_LABEL_PATH = Path(VAL_FGSM_POSTHOC_PATH, 'labels.csv')

CLASS_SUBSET = np.load(CLASS_SUBSET_PATH) # for train

In [2]:
!nvidia-smi -l

Mon Jan  3 10:52:59 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.80.02    Driver Version: 450.80.02    CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  GeForce RTX 208...  Off  | 00000000:DA:00.0 Off |                  N/A |
|  0%   30C    P8     1W / 250W |   1160MiB / 11019MiB |      0%   E. Process |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [19]:
cw_train_labels = pd.read_csv(TR_CW_POSTHOC_LABEL_PATH)
cw_val_labels = pd.read_csv(VAL_CW_POSTHOC_LABEL_PATH)
fgsm_train_labels = pd.read_csv(TR_FGSM_POSTHOC_LABEL_PATH)
fgsm_val_labels = pd.read_csv(VAL_FGSM_POSTHOC_LABEL_PATH)
pgd_train_labels = pd.read_csv(TR_PGD_POSTHOC_LABEL_PATH)
pgd_val_labels = pd.read_csv(VAL_PGD_POSTHOC_LABEL_PATH)

FileNotFoundError: [Errno 2] No such file or directory: '/cluster/scratch/mmathys/dl_data/posthoc/cw/validation/labels.csv'

In [20]:
labels = {'train':{'cw_train_labels':cw_train_labels,
           'fgsm_train_labels':fgsm_train_labels,
           'pgd_train_labels':pgd_train_labels},
          'val':{
'cw_val_labels': cw_val_labels,

'fgsm_val_labels':fgsm_val_labels,

'pgd_val_labels':pgd_val_labels}}

In [22]:
or_train_label = pd.read_csv(TR_ORI_LABEL_PATH, sep=' ', header=None)
or_train_label.columns=['file', 'or_label']
or_train_label['name'] = or_train_label['file'].str.split('.').str[0]

or_val_label = pd.read_csv(VAL_ORI_LABEL_PATH, sep=' ', header=None)
or_val_label.columns=['file', 'or_label']
or_val_label['name'] = or_val_label['file'].str.split('.').str[0]

In [33]:
or_train_label['or_label'].value_counts()

517    2600
639    2600
361    2600
744    2600
341    1300
       ... 
152     772
268     755
167     754
175     738
165     732
Name: or_label, Length: 996, dtype: int64

In [31]:
min(or_train_label['or_label'].value_counts())

732

In [14]:
or_train_label = or_train_label[or_train_label['or_label'].isin(CLASS_SUBSET)]
or_val_label = or_val_label[or_val_label['or_label'].isin(CLASS_SUBSET)]

In [17]:
(len(or_val_label)+len(or_train_label))*4

133724

In [5]:
for n, d in labels['train'].items():
    d = pd.merge(d, or_train_label, how='left', on='name')
    labels['train'][n] = d

for n, d in labels['val'].items():
    d = pd.merge(d, or_val_label, how='left', on='name')
    labels['val'][n] = d

In [6]:
labels['train']['cw_train_labels']

Unnamed: 0,path,name,label,file,or_label
0,org/n02101388_1842,n02101388_1842,0,n02101388_1842.JPEG,215
1,adv/n02101388_1842,n02101388_1842,1,n02101388_1842.JPEG,215
2,org/n02487347_9500,n02487347_9500,0,n02487347_9500.JPEG,373
3,adv/n02487347_9500,n02487347_9500,1,n02487347_9500.JPEG,373
4,org/n04120489_4237,n04120489_4237,0,n04120489_4237.JPEG,770
...,...,...,...,...,...
6439,adv/n01910747_13176,n01910747_13176,1,n01910747_13176.JPEG,107
6440,org/n02114712_17500,n02114712_17500,0,n02114712_17500.JPEG,271
6441,adv/n02114712_17500,n02114712_17500,1,n02114712_17500.JPEG,271
6442,org/n04485082_19712,n04485082_19712,0,n04485082_19712.JPEG,872


In [9]:
labels['val']['cw_val_labels']['or_label'][labels['val']['cw_val_labels']['or_label'].isin(CLASS_SUBSET)].value_counts()

0       809
1       809
2        23
3        23
4       173
       ... 
9995    587
9996    732
9997    732
9998    752
9999    752
Name: or_label, Length: 10000, dtype: int64

In [68]:
labels['val']['cw_val_labels']['or_label'][labels['val']['cw_val_labels']['or_label'].isin(CLASS_SUBSET)]

46      215
47      215
154     131
155     131
326     770
       ... 
9895    215
9944    131
9945    131
9972     72
9973     72
Name: or_label, Length: 222, dtype: int64

In [58]:
or_train_label = or_train_label[or_train_label['or_label'].isin(CLASS_SUBSET)]

In [77]:
or_train_label.reset_index(inplace=True)

In [78]:
or_train_label[or_train_label['name']=='n04485082_19712']

Unnamed: 0,index,file,or_label,name
32173,1280950,n04485082_19712.JPEG,872,n04485082_19712


In [73]:
or_train_label.iloc[:10000]['or_label'].value_counts()

122    438
436    436
770    427
309    424
331    423
271    417
459    413
72     410
344    409
88     406
215    404
467    401
100    401
861    399
615    399
103    397
21     396
492    394
701    394
131    392
373    384
107    371
664    368
662    362
872    335
Name: or_label, dtype: int64

In [65]:
labels['val']['cw_val_labels']['or_label'][labels['val']['cw_val_labels']['or_label'].isin(CLASS_SUBSET)]

46      215
47      215
154     131
155     131
326     770
       ... 
9895    215
9944    131
9945    131
9972     72
9973     72
Name: or_label, Length: 222, dtype: int64

In [None]:
from pathlib import Path

DATA_PATH = Path('/','cluster', 'scratch', 'thobauma', 'dl_data')
MAX_PATH = Path('/','cluster', 'scratch', 'mmathys', 'dl_data')

class path_dict(dict):
    def __init__(self, name:str = None, train=True, val=True, data_path=DATA_PATH, max_path=MAX_PATH):
        super().__init__()
        self.name = name
        self.train = train
        self.val = val
        self.data_path = data_path
        self.max_path=max_path
        self.create_blueprint()

    def create_blueprint(self):
        self.__setattr__(self.name) ='b':{
                    'train':{
                        'label': None,
                        'images': None
                    },
                    'val':
                    {
                        'label':None,
                        'images':None
                    }
                },
                'p':{
                    'train':{
                        'label':None,
                        'images':None
                    },
                    'val':
                    {
                        'label':None,
                        'images':None
                    }
                }
        

def create_blueprint(path):
    return {'b':{
                    'train':{
                        'label': None,
                        'images': None
                    },
                    'val':
                    {
                        'label':None,
                        'images':None
                    }
                },
                'p':{
                    'train':{
                        'label':None,
                        'images':None
                    },
                    'val':
                    {
                        'label':None,
                        'images':None
                    }
                }
    }
def create_data_paths(datasets):
    data_paths = defaultdict(create_blueprint)
    for ds in datasets:
        b = {
            'train':
            {
                'label': ,
                'images': None
            },
            'val':
            {
                'label':None,
                'images':None
            }
        }
    return data_paths

data_paths = defaultdict(create_blueprint)
data_paths['cw']