<a href="https://colab.research.google.com/github/mostafa-ja/mal_adv3/blob/main/adverserial_attacks_functions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from scipy import sparse
import gdown
import numpy as np
import matplotlib.pyplot as plt
import pickle
import torch.nn.functional as F
import random

In [None]:
def round_x(x, round_threshold=0.5):
    """
    Rounds x by thresholding it according to round_threshold.
    :param x: input tensor
    :param round_threshold: threshold parameter
    :return: a tensor of 0s and 1s
    """
    return (x >= round_threshold).float()

def get_x0(x, initial_rounding_threshold=0.5, is_sample=False):
    """
    Helper function to randomly initialize the inner maximizer algorithm.
    Randomizes the input tensor while preserving its functionality.
    :param x: input tensor
    :param rounding_threshold: threshold for rounding
    :param is_sample: flag to sample randomly from feasible area
    :return: randomly sampled feasible version of x
    """
    if is_sample:
        rand_x = round_x(torch.rand(x.size()), initial_rounding_threshold=initial_rounding_threshold)
        return (rand_x.byte() | x.byte()).float()
    else:
        return x

def or_float_tensors(x_1, x_2):
    """
    ORs two float tensors by converting them to byte and back.
    :param x_1: tensor one
    :param x_2: tensor two
    :return: float tensor of 0s and 1s
    """
    return (x_1.byte() | x_2.byte()).float()


def xor_float_tensors(x_1, x_2):
    """
    XORs two float tensors by converting them to byte and back
    Note that byte() takes the first 8 bit after the decimal point of the float
    e.g., 0.0 ==> 0
          0.1 ==> 0
          1.1 ==> 1
        255.1 ==> 255
        256.1 ==> 0
    Subsequently the purpose of this function is to map 1s float tensors to 1
    and those of 0s to 0. I.e., it is meant to be used on tensors of 0s and 1s.

    :param x_1: tensor one
    :param x_2: tensor two
    :return: float tensor of 0s and 1s.
    """
    return (x_1.byte() ^ x_2.byte()).float()

def get_loss(x,y,model):
    criterion = nn.CrossEntropyLoss(reduction='none')
    outputs = model(x)
    loss = criterion(outputs, y.view(-1).long())
    _, predicted = torch.topk(outputs, k=1)
    done = (predicted != y).squeeze()

    return loss, done



In [None]:
def dfgsm_k(x, y, model, k=25, epsilon=0.02, alpha=1., initial_rounding_threshold=0.5, round_threshold=0.5, random=False, is_report_loss_diff=True, is_sample=False, device="cpu"):
    """
    FGSM^k with deterministic rounding
    :param y: ground truth labels
    :param x: feature vector
    :param model: neural network model
    :param k: number of steps
    :param epsilon: update value in each direction
    :param alpha: hyperparameter for controlling the portionate of rounding
    :param initial_rounding_threshold: threshold parameter for rounding the initial x_next
    :param round_threshold: threshold parameter for rounding
    :param is_report_loss_diff: flag to report loss difference
    :param is_sample: flag to sample randomly from the feasible area
    :return: the adversarial version of x according to dfgsm_k (tensor)
    """
    model.eval()

    # Compute natural loss
    criterion = nn.CrossEntropyLoss(reduction='none')
    loss_natural = criterion(model(x), y.view(-1).long())

    # Initialize starting point
    x_next = x.clone()
    x_next = get_x0(x_next, initial_rounding_threshold, is_sample)

    # Multi-step
    for t in range(k):
        # Forward pass
        x_var = x_next.clone().detach().requires_grad_(True)
        y_model = model(x_var)
        loss = criterion(y_model, y.view(-1).long())

        # Compute gradient
        grad_vars = torch.autograd.grad(loss.mean(), x_var)
        # Find the next sample
        x_next = x_next + epsilon * torch.sign(grad_vars[0].data)

        # Projection
        x_next = torch.clamp(x_next, min=0., max=1.)

    # Rounding step
    if random:
       round_threshold = (torch.rand(x_next.size()) * alpha).to(device=x.device)
    x_next = round_x(x_next, round_threshold=round_threshold)

    # Feasible projection
    x_next = or_float_tensors(x_next, x)

    # Compute adversarial loss
    loss_adv = criterion(model(x_next), y.view(-1).long()).data

    if is_report_loss_diff:
        #print(f"Natural loss: {loss_natural.mean():.4f}, Adversarial loss: {loss_adv.mean():.4f}, Difference: {(loss_adv.mean() - loss_natural.mean()):.4f}")
        outputs = model(x_next)
        _, predicted = torch.topk(outputs, k=1)
        done = (predicted != y).squeeze()
        print(f"dFGSM: attack effectiveness {done.sum().item() / x.size()[0] * 100:.3f}%.")

    # Replace with natural if adversarial loss is higher
    replace_flag = (loss_adv < loss_natural).squeeze()
    x_next[replace_flag] = x[replace_flag]

    return x_next


In [None]:
def bga_k(x, y, model, k=25, alpha=1., is_report_loss_diff=True, use_sample=False):
    """
    Multi-step bit gradient ascent
    :param x: feature vector
    :param y: ground truth labels
    :param model: neural network model
    :param k: number of steps
    :param alpha: hyperparameter for controlling updates
    :param is_report_loss_diff: flag to report loss difference
    :param use_sample: flag to sample randomly from the feasible area
    :return: the adversarial version of x according to bga_k (tensor)
    """
    model.eval()

    # Compute natural loss
    criterion = nn.CrossEntropyLoss(reduction='none')
    loss_natural = criterion(model(x), y.view(-1).long())

    # Initialize worst loss and corresponding adversarial samples
    loss_worst = loss_natural.clone()
    x_worst = x.clone()

    # Book-keeping
    sqrt_m = (torch.sqrt(torch.tensor([x.size()[1]], dtype=torch.float))).to(x.device)

    # Multi-step with gradients
    for t in range(k):
        if t == 0:
            # Initialize starting point
            x_next = get_x0(x, use_sample)
        else:
            # Compute gradient
            grad_vars = torch.autograd.grad(loss.mean(), x_var)
            grad_data = grad_vars[0].data

            # Compute the updates
            # torch.norm(grad_data, 2, 1), 2:the L2-norm , 1:the norm along dimension 1
            x_update = (sqrt_m * (1. - 2. * x_next) * grad_data >= (alpha * torch.norm(grad_data, 2, 1).unsqueeze(1))).float()

            # Find the next sample with projection to the feasible set
            x_next = xor_float_tensors(x_update, x_next)
            x_next = or_float_tensors(x_next, x)

        # Forward pass
        x_var = x_next.clone().detach().requires_grad_(True)
        y_model = model(x_var)
        loss = criterion(y_model, y.view(-1).long())

        # Update worst loss and adversarial samples
        replace_flag = (loss.data > loss_worst)
        loss_worst[replace_flag] = loss.data[replace_flag]
        x_worst[replace_flag] = x_next[replace_flag]

    if is_report_loss_diff:
        #print(f"Natural loss: {loss_natural.mean():.4f}, Adversarial loss: {loss_worst.mean():.4f}, Difference: {(loss_worst.mean() - loss_natural.mean()):.4f}")
        outputs = model(x_worst)
        _, predicted = torch.topk(outputs, k=1)
        done = (predicted != y).squeeze()
        print(f"bga_k: attack effectiveness {done.sum().item() / x.size()[0] * 100:.3f}%.")

    return x_worst


In [None]:
def bca_k(x, y, model, k=25, is_report_loss_diff=True, use_sample=False):
    """
    Multi-step bit coordinate ascent
    :param use_sample:
    :param is_report_loss_diff:
    :param y:
    :param x: (tensor) feature vector
    :param model: nn model
    :param k: num of steps
    :return: the adversarial version of x according to bca_k (tensor)
    """

    model.eval()

    # Compute natural loss
    criterion = nn.CrossEntropyLoss(reduction='none')
    loss_natural = criterion(model(x), y.view(-1).long())

    # keeping worst loss
    loss_worst = loss_natural.clone()
    x_worst = x.clone()

    # multi-step with gradients
    loss = None
    x_var = None
    x_next = None
    for t in range(k):
        if t == 0:
            # initialize starting point
            x_next = get_x0(x, use_sample)
        else:
            # compute gradient
            grad_vars = torch.autograd.grad(loss.mean(), x_var)
            grad_data = grad_vars[0].data

            # compute the updates (can be made more efficient than this)
            #aug_grad = (1. - 2. * x_next) * grad_data #this line is wrong because the grad_data can be negative
            aug_grad = (x_next < 0.5) * grad_data # the correct version
            val, _ = torch.topk(aug_grad, 1)
            x_update = (aug_grad >= val.expand_as(aug_grad)).float()
            # find the next sample with projection to the feasible set
            x_next = xor_float_tensors(x_update, x_next)
            x_next = or_float_tensors(x_next, x)

        # forward pass
        x_var = x_next.clone().detach().requires_grad_(True)
        y_model = model(x_var)
        loss = criterion(y_model, y.view(-1).long())

        # update worst loss and adversarial samples
        replace_flag = (loss.data > loss_worst)
        loss_worst[replace_flag] = loss.data[replace_flag]
        x_worst[replace_flag] = x_next[replace_flag]

    if is_report_loss_diff:
        #print("Natural loss (%.4f) vs Adversarial loss (%.4f), Difference: (%.4f)" %(loss_natural.mean(), loss_worst.mean(), loss_worst.mean() - loss_natural.mean()))
        outputs = model(x_worst)
        _, predicted = torch.topk(outputs, k=1)
        done = (predicted != y).squeeze()
        print(f"bca_k: attack effectiveness {done.sum().item() / x.size()[0] * 100:.3f}%.")



    return x_worst

In [None]:
def grosse_k(x, y, model, k=25, is_report_loss_diff=True, use_sample=False):
    """
    Multi-step bit coordinate ascent
    :param use_sample:
    :param is_report_loss_diff:
    :param y:
    :param x: (tensor) feature vector
    :param model: nn model
    :param k: num of steps
    :return: the adversarial version of x according to bca_k (tensor)
    """
    epsilon = 1e-10 # avoid gradient less than epsilon

    model.eval()

    # Compute natural loss
    criterion = nn.CrossEntropyLoss(reduction='none')
    loss_natural = criterion(model(x), y.view(-1).long())

    # keeping worst loss
    loss_worst = loss_natural.clone()
    x_worst = x.clone()

    # multi-step with gradients
    output = None
    x_var = None
    x_next = None
    for t in range(k):
        if t == 0:
            # initialize starting point
            x_next = get_x0(x, use_sample)
        else:
            # compute gradient
            # ouput.shape=([batch_size, 2]) because of 2 neoruns, so we just use the output of the first neorun(benign)
            grad_vars = torch.autograd.grad(output[:, 0].mean(), x_var)
            grad_data = grad_vars[0].data

            # compute the updates (can be made more efficient than this)
            #aug_grad = (1. - x_next) * grad_data
            aug_grad = (x_next < 0.5) * grad_data
            val, _ = torch.topk(aug_grad, 1)
            x_update = ((aug_grad >= val.expand_as(aug_grad)).float()) * (aug_grad > epsilon)

            # find the next sample with projection to the feasible set
            x_next = xor_float_tensors(x_update, x_next)
            x_next = or_float_tensors(x_next, x)

        # forward pass
        x_var = x_next.clone().detach().requires_grad_(True)
        output = model(x_var)
        loss = criterion(output, y.view(-1).long())

        # update worst loss and adversarial samples
        replace_flag = (loss.data > loss_worst)
        loss_worst[replace_flag] = loss.data[replace_flag]
        x_worst[replace_flag] = x_next[replace_flag]

    if is_report_loss_diff:
        #print("Natural loss (%.4f) vs Adversarial loss (%.4f), Difference: (%.4f)" %(loss_natural.mean(), loss_worst.mean(), loss_worst.mean() - loss_natural.mean()))
        outputs = model(x_worst)
        _, predicted = torch.topk(outputs, k=1)
        done = (predicted != y).squeeze()
        print(f"grosse_k: attack effectiveness {done.sum().item() / x.size()[0] * 100:.3f}%.")



    return x_worst

In [None]:
def pgd(x, y, model, k=25, step_length=0.02, norm='linf', initial_rounding_threshold=0.5, round_threshold=0.5, random=False, is_report_loss_diff=True, is_sample=False):
    """
    Projected Gradient Descent (PGD) adversarial attack.
    :param y: Ground truth labels
    :param x: Feature vector
    :param model: Neural network model
    :param k: Number of steps
    :param step_length: Step size for each iteration
    :param norm: Norm used for perturbation ('linf' or 'l2')
    :param initial_rounding_threshold: Threshold parameter for rounding the initial x_next
    :param round_threshold: Threshold parameter for rounding
    :param random: Flag to generate random thresholds
    :param is_report_loss_diff: Flag to report loss difference
    :param is_sample: Flag to sample randomly from the feasible area
    :return: The adversarial version of x (tensor)
    """
    model.eval()

    # Compute natural loss
    criterion = nn.CrossEntropyLoss(reduction='none')
    loss_natural = criterion(model(x), y.view(-1).long())

    # Initialize starting point
    x_next = x.clone()
    x_next = get_x0(x_next, initial_rounding_threshold, is_sample)

    # Multi-step PGD
    for t in range(k):
        # Forward pass
        x_var = x_next.clone().detach().requires_grad_(True)
        y_model = model(x_var)
        loss = criterion(y_model, y.view(-1).long())

        # Compute gradient
        grad_vars = torch.autograd.grad(loss.mean(), x_var)
        grad_data = grad_vars[0].data
        gradients = grad_data * (x < 0.5)

        # Norm
        if norm == 'linf':
            perturbation = torch.sign(gradients)
        elif norm == 'l2_2':
            max_grad, _ = gradients.max(dim=1, keepdim=True)
            l2norm = torch.linalg.norm(gradients, dim=-1, keepdim=True)
            #print(max_grad/l2norm)
            perturbation = torch.minimum(torch.tensor(1., dtype=x.dtype, device=x.device), gradients / max_grad)

        elif norm == 'l2':
            l2norm = torch.linalg.norm(gradients, dim=-1, keepdim=True)
            perturbation = torch.minimum(torch.tensor(1., dtype=x.dtype, device=x.device), gradients / l2norm)
            perturbation[torch.isnan(perturbation)] = 0.
            perturbation[torch.isinf(perturbation)] = 1.
        elif norm == 'l1':
            #ignore the gradient of indice which is updated
            gradients = gradients * (x_next < 0.5)
            val, _ = torch.topk(gradients, 1)
            perturbation = (gradients >= val.expand_as(gradients)).float()
            # stop perturbing the examples that are successful to evade the victim
            outputs = model(x_next)
            _, predicted = torch.topk(outputs, k=1)
            done = (predicted != y).squeeze()

            if torch.all(done):
                break
            perturbation[done] = 0.

        else:
            raise ValueError("Expect 'l1' or 'l2' or 'linf' norm.")

        # Update x_next
        x_next = torch.clamp(x_next + perturbation * step_length, min=0., max=1.)

    # Rounding step
    if random:
       round_threshold = torch.rand(x_next.size())
    x_next = round_x(x_next, round_threshold=round_threshold)

    # Feasible projection
    x_next = or_float_tensors(x_next, x)

    # Compute adversarial loss
    loss_adv = criterion(model(x_next), y.view(-1).long()).data

    if is_report_loss_diff:
        outputs = model(x_next)
        _, predicted = torch.topk(outputs, k=1)
        done = (predicted != y).squeeze()
        print(f"PGD {norm}: Attack effectiveness {done.sum().item() / x.size()[0] * 100:.3f}%.")

    # Replace with natural if adversarial loss is higher
    replace_flag = (loss_adv < loss_natural).squeeze()
    x_next[replace_flag] = x[replace_flag]

    return x_next




```
def mimic_attack_effectiveness_optimized(test_loader, model, seed, trials=1000, device="cuda:0"):
  """
  Calculates the effectiveness of the mimic attack on the given model.

  Args:
      test_loader: A PyTorch dataloader containing the test data.
      model: The PyTorch model to be attacked.
      seed: The random seed for reproducibility.
      trials: The number of random samples to use from the benign class (default: 1000).
      device: The device to use for computations (default: "cuda:0" if available, otherwise "cpu").

  Returns:
      The effectiveness of the mimic attack as a percentage (float).
  """

  torch.manual_seed(seed)
  model.eval()

  # Initialize counters
  successful_attacks = 0
  total_malicious_samples = 0

  # Pre-select benign samples for efficiency
  benign_samples = []
  for x_batch, y_batch in test_loader:
    benign_samples.append(x_batch[y_batch.squeeze() == 0])

  ben_x = torch.cat(benign_samples, dim=0).to(device)

  # Clear unnecessary variables
  del benign_samples

  trials = min(trials, len(ben_x))


  for x_batch, y_batch in test_loader:
    x_batch, y_batch = x_batch.to(device), y_batch.to(device)
    malicious_samples = x_batch[y_batch.squeeze() == 1]

    if len(malicious_samples) > 0:
      # Expand dimensions for efficient broadcasting
      malicious_samples_expanded = malicious_samples.unsqueeze(1).expand(-1, trials, -1)

      # Generate random indices outside the loop
      seed += 1
      torch.manual_seed(seed)
      indices = torch.randperm(len(ben_x), device=device)[:trials]
      trial_vectors_expanded = ben_x[indices].unsqueeze(0)

      # Perform the mimic attack and update counters
      modified_x = torch.clamp(malicious_samples_expanded + trial_vectors_expanded, min=0., max=1.)
      _, done = get_loss(modified_x.view(-1, modified_x.shape[-1]), torch.ones(trials * malicious_samples.shape[0], 1, device=device), model)
      successful_attacks += (done.view(malicious_samples.shape[0], trials).sum(dim=1) > 0).sum().item()
      total_malicious_samples += malicious_samples.shape[0]

  # Calculate and print attack effectiveness
  attack_effectiveness = (successful_attacks / total_malicious_samples) * 100 if total_malicious_samples > 0 else 0
  print(f"Mimic attack effectiveness: {attack_effectiveness:.3f}%.")

  return attack_effectiveness  # Added return statement for clarity

```



In [None]:
def mimicry(ben_x, malicious_samples, model_DNN, trials=30, seed=230, is_report_loss_diff=False):
    """
    Perform a mimicry attack.

    Args:
    - ben_x (torch.Tensor): Benign samples tensor.
    - malicious_samples (torch.Tensor): Malicious samples tensor.
    - model_DNN (torch.nn.Module): PyTorch model used for the attack.
    - trials (int): Number of trials for the attack.
    - seed (int): Random seed for reproducibility.
    - is_report_loss_diff (bool): Flag to indicate whether to report attack effectiveness.

    Returns:
    - adv_x (torch.Tensor): Adversarial examples tensor.
    """

    # Ensure trials do not exceed the length of ben_x
    trials = min(trials, len(ben_x))

    # Get the number of malicious samples
    n_samples = len(malicious_samples)

    if n_samples > 0:
        # Expand dimensions for efficient broadcasting
        malicious_samples_expanded = malicious_samples.unsqueeze(1).expand(-1, trials, -1)

        # Generate random indices for sampling from ben_x
        torch.manual_seed(seed)
        indices = torch.randperm(len(ben_x), device=ben_x.device)[:trials]
        trial_vectors_expanded = ben_x[indices].unsqueeze(0)

        # Perform the mimic attack
        pertbx = torch.clamp(malicious_samples_expanded + trial_vectors_expanded, min=0., max=1.)

        # Compute the loss and check if adversarial examples are successful
        loss, done = get_loss(pertbx.view(-1, pertbx.shape[-1]), torch.ones(n_samples * trials, 1, device=ben_x.device), model_DNN)

        # Add maximum loss to successful attacks to differentiate
        max_v = loss.max()
        loss[done] += max_v

        # Reshape the loss and done tensors
        loss = loss.view(n_samples, trials)
        done = done.view(n_samples, trials)

        # Report attack effectiveness if required
        if is_report_loss_diff:
            n_done = torch.any(done, dim=-1).sum()
            print(f"Mimicry*{trials}: Attack effectiveness {n_done / n_samples * 100:.3f}%.")

        # Get the index of the maximum loss for each sample
        _, indices = loss.max(dim=-1)
        adv_x = pertbx[torch.arange(n_samples), indices]

        del pertbx, loss, done, malicious_samples_expanded, trial_vectors_expanded

        return adv_x
    else:
        print("No malicious samples found.")
        return None




In [None]:
def PGD_Max(x,y, model, attack_list = ['linf', 'l2', 'l1'],steps_max=5, is_sample = False, varepsilon = 1e-20):
    """
    PGD_Max adversarial attack.

    Args:
        x: Input data tensor (shape: [samples, features])
        y: Ground truth labels tensor (shape: [samples])
        model: Neural network model
        attack_list: List of norms for attacks (default: ['linf', 'l2', 'l1'])
        steps_max: Maximum number of steps (default: 5)
        is_sample: Flag to sample randomly from the feasible area (default: False)
        vaρεpsilon: Tolerance for stopping condition (default: 1e-20)

    Returns:
        Adversarial version of input data (tensor)
    """
    batch_size = x.shape[0]
    norm_params = {
        'l1': {'k': 50, 'step_length': 1.0},
        'l2': {'k': 200, 'step_length': 0.05},
        'linf': {'k': 500, 'step_length': 0.002}
    }

    model.eval()
    with torch.no_grad():
        loss, done = get_loss(x,y,model) #shape:[samples],[samples]

    pre_loss = loss
    n = x.shape[0]
    adv_x = x.detach().clone()
    stop_flag = torch.zeros(n, dtype=torch.bool) #[samples]

    for t in range(steps_max):
      num_remaining  = (~stop_flag).sum().item()
      print('number of remaining samples : ',num_remaining )
      if num_remaining  <= 0:
          break

      remaining_label = y[~stop_flag]
      pertbx = []

      for norm in attack_list:
          if norm in norm_params:
              params = norm_params[norm]
              perturbation = pgd(adv_x[~stop_flag], remaining_label, model, norm=norm, is_sample=is_sample, **params)
              print("the number of added features : ", (perturbation.sum() - adv_x[~stop_flag].sum())/len(adv_x[~stop_flag]))
              pertbx.append(perturbation)
          else:
              raise ValueError("Expected 'l1' or 'l2' or 'linf' norm.")


      # here pertbx.shape = a list of (number of attacks  ,(num_remaining ,features))
      pertbx = torch.vstack(pertbx)
      # here pertbx.shape = a tensor (num_remaining *number of attacks samples, features)

      with torch.no_grad():
        remaining_label_ext = torch.cat([remaining_label] * len(attack_list)) #(labels*number of attacks )
        loss, done = get_loss(pertbx, remaining_label_ext,model) #(labels*number of attacks )
        loss = loss.reshape(len(attack_list), num_remaining ).permute(1, 0) #(num_remaining ,number of attacks)
        done = done.reshape(len(attack_list), num_remaining ).permute(1, 0) #(num_remaining ,number of attacks)

        success_flag = torch.any(done, dim=-1) #(num_remaining )
        # for a sample, if there is at least one successful attack, we will select the one with maximum loss;
        # while if no attacks evade the victim successful, all perturbed examples are reminded for selection

        done[~torch.any(done, dim=-1)] = 1 #loss.shape=done.shape=(samples,number of attacks)
        loss = (loss * done.to(torch.float)) + torch.min(loss) * (~done).to(torch.float) #(num_remaining ,number of attacks)
        pertbx = pertbx.reshape(len(attack_list), num_remaining , x.shape[1]).permute([1, 0, 2])#(num_remaining ,attacks,features)
        _, indices = loss.max(dim=-1) # ans:(samples), max loss among attacks which worked, and max loss among all attacks for sample , none of them worked
        adv_x[~stop_flag] = pertbx[torch.arange(num_remaining ), indices]
        a_loss = loss[torch.arange(num_remaining ), indices]
        pre_stop_flag = stop_flag.clone()
        stop_flag[~stop_flag] = (torch.abs(pre_loss[~stop_flag] - a_loss) < varepsilon) | success_flag
        pre_loss[~pre_stop_flag] = a_loss

    return adv_x

In [None]:
def PGD_Max2(x,y, model, attack_list = ['linf', 'l2', 'l1'],steps_max=5, is_sample = False, varepsilon = 1e-20):
    """
    PGD_Max adversarial attack.

    Args:
        x: Input data tensor (shape: [samples, features])
        y: Ground truth labels tensor (shape: [samples])
        model: Neural network model
        attack_list: List of norms for attacks (default: ['linf', 'l2', 'l1'])
        steps_max: Maximum number of steps (default: 5)
        is_sample: Flag to sample randomly from the feasible area (default: False)
        vaρεpsilon: Tolerance for stopping condition (default: 1e-20)

    Returns:
        Adversarial version of input data (tensor)
    """
    batch_size = x.shape[0]
    norm_params = {
        'l1': {'k': 50, 'step_length': 1.0},
        'l2': {'k': 200, 'step_length': 0.05},
        'linf': {'k': 500, 'step_length': 0.002}
    }

    model.eval()
    with torch.no_grad():
        loss, done = get_loss(x,y,model) #shape:[samples],[samples]

    pre_loss = loss
    n = x.shape[0]
    adv_x = x.detach().clone()
    stop_flag = torch.zeros(n, dtype=torch.bool) #[samples]

    for t in range(steps_max):
      num_remaining  = (~stop_flag).sum().item()
      print('number of remaining samples : ',num_remaining )
      if num_remaining  <= 0:
          break

      remaining_label = y[~stop_flag]
      pertbx = []

      for norm in attack_list:
          if norm in norm_params:
              params = norm_params[norm]
              perturbation = pgd(adv_x[~stop_flag], remaining_label, model, norm=norm, is_sample=is_sample, **params)
              print("the number of added features : ", (perturbation.sum() - adv_x[~stop_flag].sum())/len(adv_x[~stop_flag]))
              pertbx.append(perturbation)
          else:
              raise ValueError("Expected 'l1' or 'l2' or 'linf' norm.")


      # here pertbx.shape = a list of (number of attacks  ,(num_remaining ,features))
      pertbx = torch.vstack(pertbx)
      # here pertbx.shape = a tensor (num_remaining *number of attacks samples, features)

      with torch.no_grad():
        remaining_label_ext = torch.cat([remaining_label] * len(attack_list)) #(labels*number of attacks )
        loss, done = get_loss(pertbx, remaining_label_ext,model) #(labels*number of attacks )

        # for a sample, if there is at least one successful attack, we will select the one with maximum loss;
        # while if no attacks evade the victim successful, all perturbed examples are reminded for selection
        max_v = loss.amax()
        loss[done] += max_v

        loss = loss.reshape(len(attack_list), num_remaining ).permute(1, 0) #(num_remaining ,number of attacks)
        done = done.reshape(len(attack_list), num_remaining ).permute(1, 0) #(num_remaining ,number of attacks)

        success_flag = torch.any(done, dim=-1) #(num_remaining )

        pertbx = pertbx.reshape(len(attack_list), num_remaining , x.shape[1]).permute([1, 0, 2])#(num_remaining ,attacks,features)
        _, indices = loss.max(dim=-1) # ans:(samples), max loss among attacks which worked, and max loss among all attacks for sample , none of them worked
        adv_x[~stop_flag] = pertbx[torch.arange(num_remaining ), indices]
        a_loss = loss[torch.arange(num_remaining ), indices]
        pre_stop_flag = stop_flag.clone()
        stop_flag[~stop_flag] = (torch.abs(pre_loss[~stop_flag] - a_loss) < varepsilon) | success_flag
        pre_loss[~pre_stop_flag] = a_loss

    return adv_x

In [None]:
def pgd_step(x, y, model, norm, k, step_length):
    """
    Projected Gradient Descent (PGD) adversarial attack for stepwise.
    :param y: Ground truth labels
    :param x: Feature vector
    :param model: Neural network model
    :param k: Number of steps
    :param step_length: Step size for each iteration
    :param norm: Norm used for perturbation ('linf' or 'l2')
    :return: The adversarial version of x (tensor)(not rounded)
    """
    model.eval()

    criterion = nn.CrossEntropyLoss(reduction='none')

    # Initialize starting point
    x_next = x.clone()

    # Multi-step PGD
    for t in range(k):
        # Forward pass
        x_var = x_next.clone().detach().requires_grad_(True)
        y_model = model(x_var)
        loss = criterion(y_model, y.view(-1).long())

        # Compute gradient
        grad_vars = torch.autograd.grad(loss.mean(), x_var)
        grad_data = grad_vars[0].data
        gradients = grad_data * (x < 0.5)

        # Norm
        if norm == 'linf':
            perturbation = torch.sign(gradients)
        elif norm == 'l2':
            l2norm = torch.linalg.norm(gradients, dim=-1, keepdim=True)
            perturbation = torch.minimum(torch.tensor(1., dtype=x.dtype, device=x.device), gradients / l2norm)
            perturbation[torch.isnan(perturbation)] = 0.
            perturbation[torch.isinf(perturbation)] = 1.
        elif norm == 'l1':
            #ignore the gradient of indice which is updated
            gradients = gradients * (x_next < 0.5)
            val, _ = torch.topk(gradients, 1)
            perturbation = torch.sign(gradients >= val.expand_as(gradients))
            # stop perturbing the examples that are successful to evade the victim
            outputs = model(x_next)
            _, predicted = torch.topk(outputs, k=1)
            done = (predicted != y).squeeze()
            if torch.all(done):
                break
            perturbation[done] = 0.
        else:
            raise ValueError("Expect 'l1' or 'l2' or 'linf' norm.")

        # Update x_next
        x_next = torch.clamp(x_next + perturbation * step_length, min=0., max=1.)

    #remove negative pertubations, we cant use OR function because we have values between (0,1) like 0.2 which we want to keep
    x_adv = (((x_next - x) >= 0) * x_next) + (((x_next - x) < 0) * x)
    return x_adv

In [None]:
def StepwiseMax(
    x,
    label,
    model,
    attack_list=["linf", "l2", "l1"],
    step_lengths={"l1": 1.0, "l2": 0.05, "linf": 0.002},
    steps=100,
    step_check = 1,
    random_start=False,
    round_threshold=0.5,
    is_attacker=False,
    is_score_round = False
):
  """
    Stepwise max attack (mixture of pgd-l1, pgd-l2, pgd-linf).

    Args:
        x: Input data tensor (shape: [batch_size, feature_dim])
        label: Ground truth labels tensor (shape: [batch_size])
        model: Victim model
        attack_list: List of attack norms (default: ["linf", "l2", "l1"])
        step_lengths: Dictionary mapping norm to its step length (default: {"l1": 1.0, "l2": 0.05, "linf": 0.002})
        steps: Maximum number of iterations (default: 100)
        random_start: Use random starting point (default: False)
        round_threshold: Threshold for rounding real scalars (default: 0.5)
        is_attacker: Play the role of attacker (default: False)

    Returns:
        Adversarial examples tensor (same shape as x)
  """

  model.eval()

  step_check = 1
  if not is_attacker:
      step_checks = [1, 10, 25, 50]
      step_check = random.choice(step_checks)

  print(f"Step check: {step_check}")
  mini_steps = [step_check] * (steps // step_check)
  if steps % step_check != 0:
      mini_steps.append(steps % step_check)

  n, red_n = x.shape
  adv_x = x.detach().clone()
  pert_x_cont = None
  prev_done = None
  for i, mini_step in enumerate(mini_steps):
      with torch.no_grad():
          if i == 0 :
              adv_x = get_x0(adv_x, initial_rounding_threshold=round_threshold, is_sample=random_start)

          _, done = get_loss(adv_x, label, model)
      if torch.all(done):
          break
      if i == 0:
          adv_x[~done] = x[~done]  # recompute the perturbation under other penalty factors
          prev_done = done
      else:
          adv_x[~done] = pert_x_cont[~done[~prev_done]]
          prev_done = done

      num_sample_red = torch.sum(~done).item()
      pertbx = []
      for norm in attack_list:
          step_length = step_lengths.get(norm, step_lengths["l1"])
          perturbation = pgd_step(adv_x[~done], label[~done], model, norm, mini_step, step_length)
          #print("the number of added features(not rounded) ", norm,": ", perturbation.sum()/len(adv_x[~done]) - adv_x[~done].sum()/len(adv_x[~done]))
          #print("the number of added features(rounded) ", norm," : ",(round_x(perturbation, round_threshold).sum() - round_x(adv_x[~done], round_threshold).sum())/len(adv_x[~done]))
          pertbx.append(perturbation)
      with torch.no_grad():
          pertbx = torch.vstack(pertbx)

          n_attacks = len(attack_list)
          label_ext = torch.cat([label[~done]] * n_attacks)

          if (not is_attacker) and (not is_score_round):
              scores, _done = get_loss(pertbx, label_ext,model)
          else:
              scores, _done = get_loss(round_x(pertbx, round_threshold), label_ext, model)
          max_v = scores.amax() if scores.amax() > 0 else 0.
          scores[_done] += max_v

          pertbx = pertbx.reshape(n_attacks, num_sample_red, red_n).permute([1, 0, 2])
          scores = scores.reshape(n_attacks, num_sample_red).permute(1, 0)
          _2, s_idx = scores.max(dim=-1)
          pert_x_cont = pertbx[torch.arange(num_sample_red), s_idx]
          adv_x[~done] = pert_x_cont if not is_attacker else round_x(pert_x_cont, round_threshold)

  print(i)
  if is_attacker:
      adv_x = round_x(adv_x, round_threshold)
  with torch.no_grad():
      _, done = get_loss(adv_x, label, model)
      print(f"step-wise max: attack effectiveness {done.sum().item() / done.size()[0] * 100:.3f}%.")

  return adv_x

In [None]:
def StepwiseMax2(
    x,
    label,
    model,
    attack_list=["linf", "l2", "l1"],
    step_lengths={"l1": 1.0, "l2": 0.05, "linf": 0.002},
    steps=100,
    step_check = 1,
    random_start=False,
    round_threshold=0.5,
    is_attacker=False,
    is_score_round = False
):
  """
    Stepwise max attack (mixture of pgd-l1, pgd-l2, pgd-linf).

    Args:
        x: Input data tensor (shape: [batch_size, feature_dim])
        label: Ground truth labels tensor (shape: [batch_size])
        model: Victim model
        attack_list: List of attack norms (default: ["linf", "l2", "l1"])
        step_lengths: Dictionary mapping norm to its step length (default: {"l1": 1.0, "l2": 0.05, "linf": 0.002})
        steps: Maximum number of iterations (default: 100)
        random_start: Use random starting point (default: False)
        round_threshold: Threshold for rounding real scalars (default: 0.5)
        is_attacker: Play the role of attacker (default: False)

    Returns:
        Adversarial examples tensor (same shape as x)
  """

  model.eval()

  step_check = 1
  if not is_attacker:
      step_checks = [1, 10, 25, 50]
      step_check = random.choice(step_checks)

  print(f"Step check: {step_check}")
  mini_steps = [step_check] * (steps // step_check)
  if steps % step_check != 0:
      mini_steps.append(steps % step_check)
  n, red_n = x.shape
  adv_x = x.detach().clone()
  pert_x_cont = None
  prev_done = None
  for i, mini_step in enumerate(mini_steps):
      with torch.no_grad():
          if i == 0 :
              adv_x = get_x0(adv_x, initial_rounding_threshold=round_threshold, is_sample=random_start)

          if is_attacker:
            _, done = get_loss(round_x(adv_x, round_threshold), label, model)
          else :
            _, done = get_loss(adv_x, label, model)
      if torch.all(done):
          break
      if i == 0:
          adv_x[~done] = x[~done]  # recompute the perturbation under other penalty factors
          prev_done = done
      else:
          adv_x[~done] = pert_x_cont[~done[~prev_done]]
          prev_done = done

      print('len(adv_x[~done]) : ',len(adv_x[~done]))
      num_sample_red = torch.sum(~done).item()
      pertbx = []
      for norm in attack_list:
          step_length = step_lengths.get(norm, step_lengths["l1"])
          perturbation = pgd_step(adv_x[~done], label[~done], model, norm, mini_step, step_length)
          #print("the number of added features(not rounded) ", norm,": ", perturbation.sum()/len(adv_x[~done]) - adv_x[~done].sum()/len(adv_x[~done]))
          #print("the number of added features(rounded) ", norm," : ",(round_x(perturbation, round_threshold).sum() - round_x(adv_x[~done], round_threshold).sum())/len(adv_x[~done]))
          pertbx.append(perturbation)
      with torch.no_grad():
          pertbx = torch.vstack(pertbx)

          n_attacks = len(attack_list)
          label_ext = torch.cat([label[~done]] * n_attacks)

          if (is_score_round):
              scores, _done = get_loss(round_x(pertbx, round_threshold), label_ext, model)
          elif is_attacker:
              scores, _ = get_loss(pertbx, label_ext,model)
              _, _done = get_loss(round_x(pertbx, round_threshold), label_ext, model)
          else:
              scores, _done = get_loss(pertbx, label_ext,model)

          max_v = scores.amax() if scores.amax() > 0 else 0.
          scores[_done] += max_v

          pertbx = pertbx.reshape(n_attacks, num_sample_red, red_n).permute([1, 0, 2])
          scores = scores.reshape(n_attacks, num_sample_red).permute(1, 0)
          _2, s_idx = scores.max(dim=-1)
          pert_x_cont = pertbx[torch.arange(num_sample_red), s_idx]
          adv_x[~done] = pert_x_cont

  print(i)
  if is_attacker:
      adv_x = round_x(adv_x, round_threshold)
  with torch.no_grad():
      _, done = get_loss(adv_x, label, model)
      print(f"step-wise max: attack effectiveness {done.sum().item() / done.size()[0] * 100:.3f}%.")

  return adv_x

In [None]:
def pgd_one_step(x, y, model, step_lengths):
    """
    Projected Gradient Descent (PGD) adversarial attack for stepwise.
    :param y: Ground truth labels
    :param x: Feature vector
    :param model: Neural network model
    :param k: Number of steps
    :param step_length: Step size for each iteration
    :param norm: Norm used for perturbation ('linf' or 'l2')
    :return: The adversarial version of x (tensor)(not rounded)
    """
    model.eval()

    criterion = nn.CrossEntropyLoss(reduction='none')

    # Initialize starting point
    x_next = x.clone()

    # one-step PGD

    # Forward pass
    x_var = x_next.clone().detach().requires_grad_(True)
    y_model = model(x_var)
    loss = criterion(y_model, y.view(-1).long())

    # Compute gradient
    grad_vars = torch.autograd.grad(loss.mean(), x_var)
    grad_data = grad_vars[0].data
    gradients = grad_data * (x < 0.5)


    # Norms
    pertbx = []
    # norm = linf
    step_length = step_lengths.get("linf", step_lengths["l1"])
    perturbation_linf = torch.sign(gradients)
    x_next_linf = torch.clamp(x_next + perturbation_linf * step_length, min=0., max=1.)
    #remove negative pertubations, we cant use OR function because we have values between (0,1) like 0.2 which we want to keep
    x_adv_linf = (((x_next_linf - x) >= 0) * x_next_linf) + (((x_next_linf - x) < 0) * x)
    pertbx.append(x_adv_linf)
    # norm = l2
    step_length = step_lengths.get("l2", step_lengths["l1"])
    l2norm = torch.linalg.norm(gradients, dim=-1, keepdim=True)
    perturbation_l2 = torch.minimum(torch.tensor(1., dtype=x.dtype, device=x.device), gradients / l2norm)
    perturbation_l2[torch.isnan(perturbation_l2)] = 0.
    perturbation_l2[torch.isinf(perturbation_l2)] = 1.
    x_next_l2 = torch.clamp(x_next + perturbation_l2 * step_length, min=0., max=1.)
    x_adv_l2 = (((x_next_l2 - x) >= 0) * x_next_l2) + (((x_next_l2 - x) < 0) * x)
    pertbx.append(x_adv_l2)
    # norm = l1
    step_length = step_lengths.get("l1", step_lengths["l1"])
    #ignore the gradient of indice which is updated
    gradients = gradients * (x_next < 0.5)
    val, _ = torch.topk(gradients, 1)
    perturbation_l1 = torch.sign(gradients >= val.expand_as(gradients))
    x_next_l1 = torch.clamp(x_next + perturbation_l1 * step_length, min=0., max=1.)
    x_adv_l1 = (((x_next_l1 - x) >= 0) * x_next_l1) + (((x_next_l1 - x) < 0) * x)
    pertbx.append(x_adv_l1)

    return pertbx

In [None]:
def StepwiseMax_onestep(
    x,
    label,
    model,
    attack_list=["linf", "l2", "l1"],
    step_lengths={"l1": 1.0, "l2": 0.05, "linf": 0.002},
    steps=100,
    random_start=False,
    round_threshold=0.5,
):
  """
    Stepwise max attack (mixture of pgd-l1, pgd-l2, pgd-linf).

    Args:
        x: Input data tensor (shape: [batch_size, feature_dim])
        label: Ground truth labels tensor (shape: [batch_size])
        model: Victim model
        attack_list: List of attack norms (default: ["linf", "l2", "l1"])
        step_lengths: Dictionary mapping norm to its step length (default: {"l1": 1.0, "l2": 0.05, "linf": 0.002})
        steps: Maximum number of iterations (default: 100)
        random_start: Use random starting point (default: False)
        round_threshold: Threshold for rounding real scalars (default: 0.5)

    Returns:
        Adversarial examples tensor (same shape as x)
  """

  model.eval()

  n, red_n = x.shape
  adv_x = x.detach().clone()
  pert_x_cont = None
  prev_done = None
  for step in range(steps):
      with torch.no_grad():
          if step == 0 :
              adv_x = get_x0(adv_x, initial_rounding_threshold=round_threshold, is_sample=random_start)

          _, done = get_loss(round_x(adv_x, round_threshold), label, model)

      if torch.all(done):
          break
      if step == 0:
          adv_x[~done] = x[~done]  # recompute the perturbation under other penalty factors
          prev_done = done
      else:
          adv_x[~done] = pert_x_cont[~done[~prev_done]]
          prev_done = done

      print('len(adv_x[~done]) : ',len(adv_x[~done]))
      num_sample_red = torch.sum(~done).item()

      pertbx = pgd_one_step(adv_x[~done], label[~done], model, step_lengths)

      with torch.no_grad():
          pertbx = torch.vstack(pertbx)

          n_attacks = len(attack_list)
          label_ext = torch.cat([label[~done]] * n_attacks)

          scores, _ = get_loss(pertbx, label_ext,model)
          _, _done = get_loss(round_x(pertbx, round_threshold), label_ext, model)

          max_v = scores.amax() if scores.amax() > 0 else 0.
          scores[_done] += max_v

          pertbx = pertbx.reshape(n_attacks, num_sample_red, red_n).permute([1, 0, 2])
          scores = scores.reshape(n_attacks, num_sample_red).permute(1, 0)
          _2, s_idx = scores.max(dim=-1)
          pert_x_cont = pertbx[torch.arange(num_sample_red), s_idx]
          adv_x[~done] = pert_x_cont

  print(step)
  adv_x = round_x(adv_x, round_threshold)
  with torch.no_grad():
      _, done = get_loss(adv_x, label, model)
      print(f"step-wise max: attack effectiveness {done.sum().item() / done.size()[0] * 100:.3f}%.")

  return adv_x

In [None]:
def around_x(x, std_deviation=0.1, random_start=False):
    """
    Helper function to randomly initialize the inner maximizer algorithm.
    Randomizes the input tensor while preserving its functionality.
    :param x: input tensor
    :param std_deviation: std_deviation for domain
    :param random_start: flag to sample randomly from feasible area
    :return: randomly sampled feasible version of x
    """
    if random_start:

        # Generate random tensor from a Gaussian distribution centered around zero
        random_tensor = abs(torch.randn(x.size()))

        # Scale the values to control the spread of the distribution
        random_tensor *= std_deviation

        return torch.clamp(x + random_tensor, min=0., max=1.)
    else:
        return x


In [None]:
def pgd_one_step2(x, y, model, step_lengths,x_initial):
    """
    Projected Gradient Descent (PGD) adversarial attack for stepwise.
    :param y: Ground truth labels
    :param x: Feature vector
    :param model: Neural network model
    :param k: Number of steps
    :param step_length: Step size for each iteration
    :param norm: Norm used for perturbation ('linf' or 'l2')
    :return: The adversarial version of x (tensor)(not rounded)
    """
    model.eval()

    criterion = nn.CrossEntropyLoss(reduction='none')

    # Initialize starting point
    x_next = x.clone()

    # one-step PGD

    # Forward pass
    x_var = x_next.clone().detach().requires_grad_(True)
    y_model = model(x_var)
    loss = criterion(y_model, y.view(-1).long())

    # Compute gradient
    grad_vars = torch.autograd.grad(loss.mean(), x_var)
    grad_data = grad_vars[0].data
    gradients = grad_data * (x_initial < 0.5)


    # Norms
    pertbx = []
    # norm = linf
    step_length = step_lengths.get("linf", step_lengths["l1"])
    perturbation_linf = torch.sign(gradients)
    x_next_linf = torch.clamp(x_next + perturbation_linf * step_length, min=0., max=1.)
    #remove negative pertubations, we cant use OR function because we have values between (0,1) like 0.2 which we want to keep
    x_adv_linf = (((x_next_linf - x_initial) >= 0) * x_next_linf) + (((x_next_linf - x_initial) < 0) * x_initial)
    pertbx.append(x_adv_linf)
    # norm = l2
    step_length = step_lengths.get("l2", step_lengths["l1"])
    l2norm = torch.linalg.norm(gradients, dim=-1, keepdim=True)
    perturbation_l2 = torch.minimum(torch.tensor(1., dtype=x.dtype, device=x.device), gradients / l2norm)
    perturbation_l2[torch.isnan(perturbation_l2)] = 0.
    perturbation_l2[torch.isinf(perturbation_l2)] = 1.
    x_next_l2 = torch.clamp(x_next + perturbation_l2 * step_length, min=0., max=1.)
    x_adv_l2 = (((x_next_l2 - x_initial) >= 0) * x_next_l2) + (((x_next_l2 - x_initial) < 0) * x_initial)
    pertbx.append(x_adv_l2)
    # norm = l1
    step_length = step_lengths.get("l1", step_lengths["l1"])
    #ignore the gradient of indice which is updated
    gradients_l1 = gradients * (x_next < 0.5)
    val, _ = torch.topk(gradients_l1, 1)

    perturbation_l1 = torch.sign(gradients >= val.expand_as(gradients)) * (val > 1e-10)

    x_next_l1 = torch.clamp(x_next + perturbation_l1 * step_length, min=0., max=1.)
    x_adv_l1 = (((x_next_l1 - x_initial) >= 0) * x_next_l1) + (((x_next_l1 - x_initial) < 0) * x_initial)
    pertbx.append(x_adv_l1)

    return pertbx

In [None]:
def StepwiseMax_onestep2(
    x,
    label,
    model,
    attack_list=["linf", "l2", "l1"],
    step_lengths={"l1": 1.0, "l2": 0.05, "linf": 0.002},
    steps=100,
    random_start=False,
    round_threshold=0.5,
):
  """
    Stepwise max attack (mixture of pgd-l1, pgd-l2, pgd-linf).

    Args:
        x: Input data tensor (shape: [batch_size, feature_dim])
        label: Ground truth labels tensor (shape: [batch_size])
        model: Victim model
        attack_list: List of attack norms (default: ["linf", "l2", "l1"])
        step_lengths: Dictionary mapping norm to its step length (default: {"l1": 1.0, "l2": 0.05, "linf": 0.002})
        steps: Maximum number of iterations (default: 100)
        random_start: Use random starting point (default: False)
        round_threshold: Threshold for rounding real scalars (default: 0.5)

    Returns:
        Adversarial examples tensor (same shape as x)
  """

  model.eval()

  n, red_n = x.shape
  adv_x = x.detach().clone()
  pert_x_cont = None
  prev_done = None
  for step in range(steps):
      with torch.no_grad():
          if step == 0 :
              adv_x = around_x(adv_x, std_deviation=0.1, random_start=random_start)

          _, done = get_loss(round_x(adv_x, round_threshold), label, model)

      if torch.all(done):
          break
      if step == 0:
          adv_x[~done] = x[~done]  # recompute the perturbation under other penalty factors
          prev_done = done
      else:
          adv_x[~done] = pert_x_cont[~done[~prev_done]]
          prev_done = done

      #print('remaining samples : ',len(adv_x[~done]))
      num_sample_red = torch.sum(~done).item()

      pertbx = pgd_one_step2(adv_x[~done], label[~done], model, step_lengths,x[~done])

      with torch.no_grad():
          pertbx = torch.vstack(pertbx)

          n_attacks = len(attack_list)
          label_ext = torch.cat([label[~done]] * n_attacks)

          scores, _ = get_loss(pertbx, label_ext,model)
          _, _done = get_loss(round_x(pertbx, round_threshold), label_ext, model)

          max_v = scores.amax() if scores.amax() > 0 else 0.
          scores[_done] += max_v

          pertbx = pertbx.reshape(n_attacks, num_sample_red, red_n).permute([1, 0, 2])
          scores = scores.reshape(n_attacks, num_sample_red).permute(1, 0)
          _2, s_idx = scores.max(dim=-1)
          #print('best attack : ',s_idx)
          pert_x_cont = pertbx[torch.arange(num_sample_red), s_idx]
          adv_x[~done] = pert_x_cont

  #print(step)
  adv_x = round_x(adv_x, round_threshold)
  with torch.no_grad():
      _, done = get_loss(adv_x, label, model)
      print(f"step-wise max: attack effectiveness {done.sum().item() / done.size()[0] * 100:.3f}%.")

  return adv_x