In [1]:
import json
import sys
from functools import partial
import copy
from torch.autograd import Variable

import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.optim.lr_scheduler as lr_scheduler
import tqdm
import wandb
from ignite.contrib.handlers import wandb_logger
from ignite.engine import (Engine, Events, create_supervised_evaluator,
                           create_supervised_trainer)
from ignite.handlers import ModelCheckpoint
from ignite.handlers.param_scheduler import LRScheduler
from ignite.metrics import Accuracy, Loss
from scipy.io.arff import loadarff
from sklearn.model_selection import train_test_split
from torch import nn
from torch.functional import F
from torch.utils.data import DataLoader, Dataset, SubsetRandomSampler

sys.path.append('../')
from attacks.deepfool import deepfool
from src.datasets import FordDataset
from src.models import TransformerClassification
from src.utils import build_optimizer, str2torch

In [2]:
with open('../configs/transformer_87.json') as f:
    config =  json.load(f)
config['train']['optimizer'] = str2torch(config['train']['optimizer'])

In [3]:
test_path = "../data/FordA/FordA_TEST.arff"
test_dataset = FordDataset(test_path, config['data'])
test_dataloader = DataLoader(test_dataset, batch_size=64)

In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = TransformerClassification(config).to(device)
weights = torch.load('../models/trans_2outp_87.pth')
model.load_state_dict(weights)

<All keys matched successfully>

In [5]:
def validation_step(engine, batch):
    model.eval()
    with torch.no_grad():
        x, y = batch[0].to(device), batch[1].to(device)
        y_pred = model(x)
        return y_pred, y

test_evaluator = Engine(validation_step)

# Attach metrics to the evaluators
metrics = {
    'accuracy': Accuracy(output_transform=lambda x: (torch.argmax(x[0], dim=1), x[1])),
}

for name, metric in metrics.items():
    metric.attach(test_evaluator, name)

In [6]:
test_evaluator.run(test_dataloader)
test_evaluator.state.metrics['accuracy']

  attn_output = scaled_dot_product_attention(q, k, v, attn_mask, dropout_p, is_causal)


0.8616477272727273

In [7]:
def deepfool(signal, model, num_classes=10, overshoot=0.02, max_iter=50, device='cpu'):

    """
       :param signal: signal of size L x 1
       :param model: modelwork (input: signals, output: values of activation **BEFORE** softmax).
       :param num_classes: num_classes (limits the number of classes to test against, by default = 10)
       :param overshoot: used as a termination criterion to prevent vanishing updates (default = 0.02).
       :param max_iter: maximum number of iterations for deepfool (default = 50)
       :return: minimal perturbation that fools the classifier, number of iterations that it required, new estimated_label and perturbed signal
    """

    f_signal = model(signal).detach().cpu().numpy().flatten()
    I = (np.array(f_signal)).flatten().argsort()[::-1]

    I = I[0:num_classes]
    label = I[0]

    input_shape = signal.cpu().numpy().shape
    pert_signal = copy.deepcopy(signal)
    w = np.zeros(input_shape)
    r_tot = np.zeros(input_shape)

    loop_i = 0

    x = Variable(pert_signal, requires_grad=True)
    fs = model(x)
    k_i = label

    while k_i == label and loop_i < max_iter:

        pert = np.inf
        fs[0, I[0]].backward(retain_graph=True)
        grad_orig = x.grad.data.cpu().numpy().copy()

        for k in range(1, num_classes):
            if x.grad is not None:
                x.grad.zero_()

            fs[0, I[k]].backward(retain_graph=True)
            cur_grad = x.grad.data.cpu().numpy().copy()

            # set new w_k and new f_k
            w_k = cur_grad - grad_orig
            f_k = (fs[0, I[k]] - fs[0, I[0]]).data.cpu().numpy()

            pert_k = abs(f_k)/(np.linalg.norm(w_k.flatten())  + 1e-10)

            # determine which w_k to use
            if pert_k < pert:
                pert = pert_k
                w = w_k

        # compute r_i and r_tot
        # Added 1e-4 for numerical stability
        r_i =  (pert+1e-4) * w / (np.linalg.norm(w) + 1e-10)
        r_tot = np.float32(r_tot + r_i)

        pert_signal = signal + (1+overshoot)*torch.from_numpy(r_tot).to(device)

        x = Variable(pert_signal, requires_grad=True)
        fs = model.forward(x)
        k_i = np.argmax(fs.data.cpu().numpy().flatten())

        loop_i += 1

    r_tot = (1+overshoot)*r_tot

    return r_tot, loop_i, label, k_i, pert_signal

In [13]:
iters = []
for i in tqdm.tqdm(range(len(test_dataset))):
    test_sample = torch.from_numpy(test_dataset[i][0]).unsqueeze(0).to(device)
    r_tot, loop_i, label, k_i, pert_image = deepfool(test_sample, model, 2, 0.2, max_iter=30, device=device)
    iters.append(loop_i)
    # if i == 100:
    #     break

100%|██████████| 10560/10560 [04:13<00:00, 41.73it/s]


In [16]:
unique, counts = np.unique(iters, return_counts=True)

In [17]:
unique, counts

(array([ 1,  2,  3,  4,  5,  6,  7,  8, 30]),
 array([4770, 3299, 1125,  269,   73,   19,    8,    2,  995], dtype=int64))

In [52]:
np.mean(iters), np.std(iters)

(4.869223484848485, 8.161662027340961)

In [1]:
import torch

In [2]:
weights = torch.load(r'C:\Users\ptmeg\OneDrive\Документы\Skoltech\Term3\ML\ts_robustness\wandb\run-20240311_215216-udqdkcx2\saved_models\best_model_5_accuracy=0.8653.pt')

In [3]:
weights

OrderedDict([('embedding_layer.weight',
              tensor([[[-0.6251,  0.3863,  0.2842]],
              
                      [[ 0.1170, -0.4436,  0.1889]],
              
                      [[ 0.2122, -0.4185,  0.0942]],
              
                      [[-0.2163, -0.4803, -0.1294]],
              
                      [[ 0.3517, -0.1285,  0.0312]],
              
                      [[-0.3919, -0.3706,  0.1335]],
              
                      [[ 0.4829,  0.2424, -0.2752]],
              
                      [[-0.3305,  0.0268, -0.2034]],
              
                      [[-0.0890,  0.4211,  0.3742]],
              
                      [[-0.2234, -0.0377,  0.4886]],
              
                      [[ 0.1856,  0.4476, -0.0331]],
              
                      [[ 0.3207, -0.0376, -0.4554]],
              
                      [[-0.1824, -0.2432,  0.2769]],
              
                      [[-0.3588, -0.2687,  0.1051]],
              
        