# Posthoc Classifier

## Install, Paths and Parameters

In [32]:
# This extension reloads external Python files
import os
from pathlib import Path
import getpass
import numpy as np
import time
import torch
from torch import nn
from tqdm import tqdm
import random
import sys

# allow imports when running script from within project dir
[sys.path.append(i) for i in ['.', '..']]

# local
from src.helpers.helpers import get_random_indexes, get_random_classes
from src.model.dino_model import get_dino
from src.model.data import create_loader, adv_dataset
from src.model.eval import validate_network

# seed
SEED = 42
random.seed(SEED)
torch.manual_seed(SEED)
np.random.seed(SEED)

username = getpass.getuser()
DATA_PATH = Path('/','cluster', 'scratch', 'thobauma', 'dl_data')

DN_PATH = Path(DATA_PATH, 'damageNet')
DN_LABEL_PATH = Path(DN_PATH, 'val_damagenet.txt')
DN_IMAGES_PATH = Path(DN_PATH, 'images')

ORI_PATH = Path(DATA_PATH, 'ori_data/validation/')
ORI_LABEL_PATH = Path(ORI_PATH,'correct_labels.txt')
ORI_IMAGES_PATH = Path(ORI_PATH,'images')

In [33]:
# If CLASS_SUBSET is specified, INDEX_SUBSET will be ignored. Set CLASS_SUBSET=None if you want to use indexes.
INDEX_SUBSET = get_random_indexes()
CLASS_SUBSET = get_random_classes()
INDEX_SUBSET = None
CLASS_SUBSET = None

BATCH_SIZE = 1

DEVICE = 'cuda'

In [3]:
#!python $HOME/deeplearning/setup/collect_env.py

## Import DINO
Official repo: https://github.com/facebookresearch/dino

In [29]:
model, linear_classifier = get_dino()

Please use the `--pretrained_weights` argument to indicate the path of the checkpoint to evaluate.
Since no pretrained weights have been provided, we load the reference pretrained DINO weights.
Model vit_small built.
We load the reference pretrained linear weights.


## Load data

In [34]:
ori_loader = create_loader(ORI_IMAGES_PATH, ORI_LABEL_PATH, INDEX_SUBSET, CLASS_SUBSET, BATCH_SIZE)

In [35]:
dn_loader = create_loader(DN_IMAGES_PATH, DN_LABEL_PATH, INDEX_SUBSET, CLASS_SUBSET, BATCH_SIZE)

## Adversarial sample generation

In [36]:
# Performs a forward pass given a sample `inp` and a classifier.
def generate_model_output(inp, n=4):
    inp = inp.to("cuda")
    # add one dimension to input image (get_intermediate_layers expects it)
    inp = inp.unsqueeze(dim=0)
    intermediate_output = model.get_intermediate_layers(inp, n)
    return torch.cat([x[:, 0] for x in intermediate_output], dim=-1)

In [38]:
class AdvTupleIterator:
    def __init__(self, ori_loader, dn_loader, model, linear_classifier):
        self.samples = adv_dataset(ori_loader, dn_loader, model, linear_classifier)

    def __iter__(self):
        return self
        
    def __next__(self):
        num, org, adv = next(self.samples)
        org_out = generate_model_output(org)
        adv_out = generate_model_output(adv)
        return num, org_out, adv_out

In [None]:
# testing iterator

total=10
samples = AdvTupleIterator(ori_loader, dn_loader, model, linear_classifier)

for i in range(total):
  num, org, adv = next(samples)
  sys.stdout.write(f"\rtuple {i+1}/{total} ({num})")
  sys.stdout.flush()

## Posthoc Classifier

In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [11]:
# Simple Binary Classifier Network
class SimpleBC(nn.Module):
    def __init__(self,input_shape):
        super(SimpleBC,self).__init__()
        self.fc1 = nn.Linear(input_shape,64)
        self.fc2 = nn.Linear(64,32)
        self.fc3 = nn.Linear(32,1)

    def forward(self,x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        return x

In [14]:
from tqdm import tqdm

# Hyperparameters
EPOCHS = 10

# Initialise network
net = SimpleBC(1536)

# Select device
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
net.to(device)

# Set model to train
net.train()

# define loss, optimizer, and scheduler
criterion = nn.BCELoss()
# optimizer = optim.Adam(net.parameters(), lr=0.001)
optimizer = optim.Adagrad(net.parameters(), lr=0.01, lr_decay=1e-08, weight_decay=0)
# scheduler = MultiStepLR(optimizer, milestones=[30,80], gamma=0.1)

losses = []
accur = []

# Train network
pbar = tqdm(range(EPOCHS))
for epoch in pbar:  # loop over the dataset multiple times

    # Metrics
    train_running_loss = 0.0
    train_running_loss_mean = 0.0
    train_acc = 0.0
    train_acc_mean = 0.0
    test_running_loss = 0.0
    test_acc = 0.0

    for i, (inputs, labels) in enumerate(train_loader, start=0):
        inputs = inputs.to(device)
        labels = labels.to(device).float()

        # Forward Pass
        outputs = net(inputs).float()
        outputs = outputs.reshape(-1)

        # Backpropagation
        optimizer.zero_grad() # Reset the gradient
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # loss train
        train_running_loss += loss.item()
        train_running_loss_mean = train_running_loss / (i+1)

        # accuracy train
        predicted = net(inputs).reshape(-1).detach().cpu().numpy().round()
        acc_labels = labels
        acc_labels = acc_labels.detach().cpu().numpy()
        inter = np.equal(predicted, acc_labels)
        train_acc += inter.mean()
        train_acc_mean = train_acc / (i+1)
    
    with torch.set_grad_enabled(False):
        for inputs, labels in test_loader:
            try:
                # get the inputs; data is a list of [inputs, labels] and write to device
                inputs = inputs.to(device)
                labels = labels.to(device).float()

                # Forward Pass
                outputs = net(inputs).float()
                outputs = outputs.reshape(-1)

                # loss test
                loss = criterion(outputs, labels)
                test_running_loss += loss.item()

                # accuracy test
                outputs = outputs.detach().cpu().numpy().round()
                comparison = np.equal(labels.detach().cpu().numpy(), outputs)
                test_acc = comparison.mean()

            except Exception as e:
                print("Error: {}".format(e))
                pass

    losses.append(train_running_loss_mean)
    accur.append(train_acc_mean)
    pbar.set_description("Ep: {}\t Tr. Loss: {:.4f}\t Tr. Acc: {:.4f}\t T. Loss: {:.4f}\t T. Acc: {:.4f}".format(epoch, 
                                                                            train_running_loss_mean, 
                                                                            train_acc_mean, 
                                                                            test_running_loss, 
                                                                            test_acc))

print('Finished Training')

  0%|          | 0/10 [00:00<?, ?it/s]


NameError: name 'train_loader' is not defined