<a href="https://colab.research.google.com/github/mostafa-ja/mal_adv3/blob/main/3_adverserial_attacks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from scipy import sparse
import gdown
import numpy as np
import matplotlib.pyplot as plt
import pickle
import torch.nn.functional as F

In [3]:
download_link = 'https://drive.google.com/uc?id=12iud4h19CZst4exbr3U2A9iDxBYvZ5U_'
output_filepath = '/content/'
gdown.download(download_link, output_filepath)

Downloading...
From (original): https://drive.google.com/uc?id=12iud4h19CZst4exbr3U2A9iDxBYvZ5U_
From (redirected): https://drive.google.com/uc?id=12iud4h19CZst4exbr3U2A9iDxBYvZ5U_&confirm=t&uuid=2cb9bb58-3da5-4697-ba66-610e9a01509b
To: /content/X_redefined_sparse_matrix.npz
100%|██████████| 2.31M/2.31M [00:00<00:00, 110MB/s]


'/content/X_redefined_sparse_matrix.npz'

In [4]:
download_link = 'https://drive.google.com/uc?id=1IhrcT3jHqlPrw2KvQ5vJkBgozxcJ1cJm'
output_filepath = '/content/'
gdown.download(download_link, output_filepath)

Downloading...
From: https://drive.google.com/uc?id=1IhrcT3jHqlPrw2KvQ5vJkBgozxcJ1cJm
To: /content/labels.pt
100%|██████████| 517k/517k [00:00<00:00, 87.8MB/s]


'/content/labels.pt'

In [20]:
download_link = 'https://drive.google.com/uc?id=13o5n06UpMDOhtk4u7B_RBSWa3kiiGXFs'
output_filepath = '/content/'
gdown.download(download_link, output_filepath)

Downloading...
From: https://drive.google.com/uc?id=13o5n06UpMDOhtk4u7B_RBSWa3kiiGXFs
To: /content/DNN_params.pth
100%|██████████| 8.17M/8.17M [00:00<00:00, 109MB/s]


'/content/DNN_params.pth'

In [6]:
download_link = 'https://drive.google.com/uc?id=1PxFOLBnQAlX-EOsqkhGCSd1T3ykAD0-4'
output_filepath = '/content/'
gdown.download(download_link, output_filepath)

Downloading...
From: https://drive.google.com/uc?id=1PxFOLBnQAlX-EOsqkhGCSd1T3ykAD0-4
To: /content/vocab.pkl
100%|██████████| 9.18M/9.18M [00:00<00:00, 92.6MB/s]


'/content/vocab.pkl'

In [7]:
# Load the dictionary from the file
with open('vocab.pkl', 'rb') as f:
    vocab = pickle.load(f)

for i, (key, value) in enumerate(vocab.items()):
    print((key, value))
    if i >= 5:
        break

('android/media/mediaplayer->start', 141045)
('android/app/activity->setcontentview', 140900)
('android/os/vibrator->cancel', 141093)
('android.permission.vibrate', 140720)
('android.hardware.touchscreen', 137091)
('android.intent.action.main', 138335)


In [8]:
# Load dataset
X_redefined = sparse.load_npz("X_redefined_sparse_matrix.npz")
labels_tensor = torch.load('labels.pt')

In [9]:
# Split data into train, validation, and test sets with stratified sampling
X_train_val, X_test, labels_train_val, labels_test = train_test_split(X_redefined, labels_tensor, test_size=0.2, stratify=labels_tensor, random_state=42)
X_train, X_val, labels_train, labels_val = train_test_split(X_train_val, labels_train_val, test_size=0.2, stratify=labels_train_val, random_state=42)

# Combine features and labels into datasets
train_dataset = TensorDataset(torch.tensor(X_train.toarray(), dtype=torch.float32), labels_train)
val_dataset = TensorDataset(torch.tensor(X_val.toarray(), dtype=torch.float32), labels_val)
test_dataset = TensorDataset(torch.tensor(X_test.toarray(), dtype=torch.float32), labels_test)


In [10]:
# Define the DataLoader for training, validation, and test sets
batch_size = 256
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [21]:
import torch
import torch.nn as nn

class MalwareDetectionModel(nn.Module):
    def __init__(self, input_size=10000, hidden_1_size=200, hidden_2_size=200, num_labels=2, dropout_prob=0.6):
        super(MalwareDetectionModel, self).__init__()

        self.input_size = input_size
        self.hidden_1_size = hidden_1_size
        self.hidden_2_size = hidden_2_size
        self.num_labels = num_labels
        self.dropout_prob = dropout_prob

        self.fc1 = nn.Linear(input_size, hidden_1_size)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout_prob)
        self.fc2 = nn.Linear(hidden_1_size, hidden_2_size)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(dropout_prob)
        self.fc3 = nn.Linear(hidden_2_size, num_labels)
        self.log_softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        x = self.fc3(x)
        x = self.log_softmax(x)
        return x


In [22]:
# Create an instance of your model
model = MalwareDetectionModel()

# Load model parameters
model.load_state_dict(torch.load('DNN_params.pth'))

<All keys matched successfully>

In [14]:
X,y = next(iter(test_loader))
print(X.shape)
print(y.shape)

torch.Size([256, 10000])
torch.Size([256, 1])




```
delta = torch.zeros_like(X, requires_grad=True)
for t in range(25):
    loss = nn.CrossEntropyLoss()(model(X + delta), y.view(-1).long())
    loss.backward()
    gradients = delta.grad.detach().sign() * (X < 0.5)
    delta.data = (delta + 0.02*delta.grad.detach().sign()).clamp(0.,1.)
    print(delta.data[0])
    delta.grad.zero_()
```



In [55]:
def round_x(x, round_threshold=0.5):
    """
    Rounds x by thresholding it according to round_threshold.
    :param x: input tensor
    :param round_threshold: threshold parameter
    :return: a tensor of 0s and 1s
    """
    return (x >= round_threshold).float()

def get_x0(x, initial_rounding_threshold=0.5, is_sample=False):
    """
    Helper function to randomly initialize the inner maximizer algorithm.
    Randomizes the input tensor while preserving its functionality.
    :param x: input tensor
    :param rounding_threshold: threshold for rounding
    :param is_sample: flag to sample randomly from feasible area
    :return: randomly sampled feasible version of x
    """
    if is_sample:
        rand_x = round_x(torch.rand(x.size()), round_threshold=rounding_threshold)
        return (rand_x.byte() | x.byte()).float()
    else:
        return x

def or_float_tensors(x_1, x_2):
    """
    ORs two float tensors by converting them to byte and back.
    :param x_1: tensor one
    :param x_2: tensor two
    :return: float tensor of 0s and 1s
    """
    return (x_1.byte() | x_2.byte()).float()


def xor_float_tensors(x_1, x_2):
    """
    XORs two float tensors by converting them to byte and back
    Note that byte() takes the first 8 bit after the decimal point of the float
    e.g., 0.0 ==> 0
          0.1 ==> 0
          1.1 ==> 1
        255.1 ==> 255
        256.1 ==> 0
    Subsequently the purpose of this function is to map 1s float tensors to 1
    and those of 0s to 0. I.e., it is meant to be used on tensors of 0s and 1s.

    :param x_1: tensor one
    :param x_2: tensor two
    :return: float tensor of 0s and 1s.
    """
    return (x_1.byte() ^ x_2.byte()).float()

def get_loss(model, adv_x, label):
    """
    Compute the loss and prediction correctness.

    Parameters:
    - model: torch.nn.Module, a victim model
    - adv_x: torch.FloatTensor, the adversarially perturbed input samples
    - label: torch.LongTensor, ground truth labels

    Returns:
    - loss_no_reduction: torch.FloatTensor, the computed loss without reduction
    - done: torch.BoolTensor, a tensor indicating if the prediction is incorrect
    """
    y_prob = model(adv_x)
    loss_no_reduction = nn.BCELoss(reduction='none')(y_prob, label)
    y_pred = (y_prob >= 0.5).float()  # Threshold at 0.5
    done = (y_pred != label).squeeze()

    return loss_no_reduction, done



```
x_1 = torch.tensor([1.0, 1.0,  1.0, 1.0, 1.0, 0.0,  0.0, 0.2])
x_2 = torch.tensor([0.0, 0.6, -0.2, 0.9, 1.9, 1.0, -0.2, 0.3])
result = or_float_tensors(x_1, x_2)
             tensor([1., 1., 1., 1., 1., 1., 0., 0.])

```



In [156]:
def dfgsm_k(x, y, model, k=25, epsilon=0.02, alpha=1., initial_rounding_threshold=0.5, round_threshold=0.5, random=False, is_report_loss_diff=True, is_sample=False):
    """
    FGSM^k with deterministic rounding
    :param y: ground truth labels
    :param x: feature vector
    :param model: neural network model
    :param k: number of steps
    :param epsilon: update value in each direction
    :param alpha: hyperparameter for controlling the portionate of rounding
    :param initial_rounding_threshold: threshold parameter for rounding the initial x_next
    :param round_threshold: threshold parameter for rounding
    :param is_report_loss_diff: flag to report loss difference
    :param is_sample: flag to sample randomly from the feasible area
    :return: the adversarial version of x according to dfgsm_k (tensor)
    """
    model.eval()

    # Compute natural loss
    criterion = nn.CrossEntropyLoss(reduction='none')
    loss_natural = criterion(model(x), y.view(-1).long())

    # Initialize starting point
    x_next = x.clone()
    x_next = get_x0(x_next, initial_rounding_threshold, is_sample)

    # Multi-step
    for t in range(k):
        # Forward pass
        x_var = x_next.clone().detach().requires_grad_(True)
        y_model = model(x_var)
        loss = criterion(y_model, y.view(-1).long())

        # Compute gradient
        grad_vars = torch.autograd.grad(loss.mean(), x_var)
        # Find the next sample
        x_next = x_next + epsilon * torch.sign(grad_vars[0].data)

        # Projection
        x_next = torch.clamp(x_next, min=0., max=1.)

    # Rounding step
    if random:
       round_threshold = torch.rand(x_next.size()) * alpha
    x_next = round_x(x_next, round_threshold=round_threshold)

    # Feasible projection
    x_next = or_float_tensors(x_next, x)

    # Compute adversarial loss
    loss_adv = criterion(model(x_next), y.view(-1).long()).data

    if is_report_loss_diff:
        #print(f"Natural loss: {loss_natural.mean():.4f}, Adversarial loss: {loss_adv.mean():.4f}, Difference: {(loss_adv.mean() - loss_natural.mean()):.4f}")
        outputs = model(x_next)
        _, predicted = torch.topk(outputs, k=1)
        done = (predicted != y).squeeze()
        print(f"rFGSM: attack effectiveness {done.sum().item() / x.size()[0] * 100:.3f}%.")

    # Replace with natural if adversarial loss is higher
    replace_flag = (loss_adv < loss_natural).squeeze()
    x_next[replace_flag] = x[replace_flag]

    return x_next


In [157]:
x_adv = dfgsm_k(X,y,model)

rFGSM: attack effectiveness 100.000%.


In [74]:
x_adv = dfgsm_k(X,y,model)

Natural loss: 0.0060, Adversarial loss: 2221.2881, Difference: 2221.2820
rFGSM: attack effectiveness 100.000%.


In [75]:
print(X.sum()/batch_size)
print(x_adv.sum()/batch_size)

tensor(26.6289)
tensor(3986.6562)


In [57]:
x_adv = dfgsm_k(X,y,model,random=True)

Natural loss: 0.0060, Adversarial loss: 1290.9233, Difference: 1290.9174
rFGSM: attack effectiveness 100.000%.


In [59]:
print(X.sum()/batch_size)
print(x_adv.sum()/batch_size)

tensor(26.6289)
tensor(2741.3125)


In [64]:
x_adv = dfgsm_k(X,y,model,k=1,random=True)


Natural loss: 0.0060, Adversarial loss: 38.6829, Difference: 38.6769
rFGSM: attack effectiveness 100.000%.


In [65]:
print(X.sum()/batch_size)
print(x_adv.sum()/batch_size)

tensor(26.6289)
tensor(120.8047)


In [149]:
for alpha in range(1, 52, 10):
  print('alpha = ',alpha)
  x_adv = dfgsm_k(X, y, model, k=20, epsilon=0.02, alpha=alpha,random=True)
  print(X.sum()/batch_size)
  print(x_adv.sum()/batch_size)
  print('***************************')

alpha =  1
rFGSM: attack effectiveness 100.000%.
tensor(28.0148)
tensor(2189.4138)
***************************
alpha =  11
rFGSM: attack effectiveness 100.000%.
tensor(28.0148)
tensor(222.7241)
***************************
alpha =  21
rFGSM: attack effectiveness 100.000%.
tensor(28.0148)
tensor(132.2562)
***************************
alpha =  31
rFGSM: attack effectiveness 99.507%.
tensor(28.0148)
tensor(98.3202)
***************************
alpha =  41
rFGSM: attack effectiveness 94.581%.
tensor(28.0148)
tensor(80.3054)
***************************
alpha =  51
rFGSM: attack effectiveness 91.626%.
tensor(28.0148)
tensor(70.1970)
***************************


In [88]:
total_samples = 0
total_attack_success = 0
total_features_x = 0
total_features_x_adv = 0

for X, y in test_loader:
    batch_size = X.size(0)
    x_adv = dfgsm_k(X, y, model, k=20, epsilon=0.0005, random=True,is_report_loss_diff=False)

    # Compute mean number of features
    total_features_x += X.sum().item()
    total_features_x_adv += x_adv.sum().item()

    # Compute attack effectiveness for this batch
    outputs_adv = model(x_adv)
    _, predicted_adv = torch.topk(outputs_adv, k=1)
    total_attack_success += torch.sum(predicted_adv != y).item()
    total_samples += batch_size

# Compute mean number of features
mean_features_x = total_features_x / total_samples
mean_features_x_adv = total_features_x_adv / total_samples

# Compute mean attack effectiveness
mean_attack_effectiveness = (total_attack_success / total_samples) * 100

# Print results
print('Mean number of features (X):', mean_features_x)
print('Mean number of features (X_adv):', mean_features_x_adv)
print('Mean attack effectiveness:', mean_attack_effectiveness)


Mean number of features (X): 27.5364492500872
Mean number of features (X_adv): 76.25450529008255
Mean attack effectiveness: 98.64356857729722


In [151]:
total_samples = 0
total_attack_success = 0
total_features_x = 0
total_features_x_adv = 0

for X, y in test_loader:
    batch_size = X.size(0)
    x_adv = dfgsm_k(X, y, model, k=20, epsilon=0.02, alpha=40,random=True,is_report_loss_diff=False)

    # Compute mean number of features
    total_features_x += X.sum().item()
    total_features_x_adv += x_adv.sum().item()

    # Compute attack effectiveness for this batch
    outputs_adv = model(x_adv)
    _, predicted_adv = torch.topk(outputs_adv, k=1)
    total_attack_success += torch.sum(predicted_adv != y).item()
    total_samples += batch_size

# Compute mean number of features
mean_features_x = total_features_x / total_samples
mean_features_x_adv = total_features_x_adv / total_samples

# Compute mean attack effectiveness
mean_attack_effectiveness = (total_attack_success / total_samples) * 100

# Print results
print('Mean number of features (X):', mean_features_x)
print('Mean number of features (X_adv):', mean_features_x_adv)
print('Mean attack effectiveness:', mean_attack_effectiveness)

Mean number of features (X): 27.5364492500872
Mean number of features (X_adv): 81.65845056776344
Mean attack effectiveness: 96.50428244777738


In [164]:
def dfgsm_k2(x, y, model, k=25, epsilon=0.02, alpha=1., initial_rounding_threshold=0.5, round_threshold=0.5, random=False, is_report_loss_diff=True, is_sample=False):
    """
    FGSM^k with deterministic rounding
    :param y: ground truth labels
    :param x: feature vector
    :param model: neural network model
    :param k: number of steps
    :param epsilon: update value in each direction
    :param alpha: hyperparameter for controlling the portionate of rounding
    :param initial_rounding_threshold: threshold parameter for rounding the initial x_next
    :param round_threshold: threshold parameter for rounding
    :param is_report_loss_diff: flag to report loss difference
    :param is_sample: flag to sample randomly from the feasible area
    :return: the adversarial version of x according to dfgsm_k (tensor)
    """
    model.eval()

    # Compute natural loss
    criterion = nn.CrossEntropyLoss(reduction='none')
    loss_natural = criterion(model(x), y.view(-1).long())

    # Initialize starting point
    x_next = x.clone()
    x_next = get_x0(x_next, initial_rounding_threshold, is_sample)

    # Multi-step
    for t in range(k):
        # Forward pass
        x_var = x_next.clone().detach().requires_grad_(True)
        y_model = model(x_var)
        loss = criterion(y_model, y.view(-1).long())

        # Compute gradient
        grad_vars = torch.autograd.grad(loss.mean(), x_var)
        #print(torch.sign(grad_vars[0].data).clamp(min=0))
        # Find the next sample
        x_next = x_next + epsilon * (torch.sign(grad_vars[0].data).clamp(min=0))

        # Projection
        x_next = torch.clamp(x_next, min=0., max=1.)

    # Rounding step
    if random:
       round_threshold = torch.rand(x_next.size()) * alpha
    x_next = round_x(x_next, round_threshold=round_threshold)

    # Feasible projection
    x_next = or_float_tensors(x_next, x)

    # Compute adversarial loss
    loss_adv = criterion(model(x_next), y.view(-1).long()).data

    if is_report_loss_diff:
        #print(f"Natural loss: {loss_natural.mean():.4f}, Adversarial loss: {loss_adv.mean():.4f}, Difference: {(loss_adv.mean() - loss_natural.mean()):.4f}")
        outputs = model(x_next)
        _, predicted = torch.topk(outputs, k=1)
        done = (predicted != y).squeeze()
        print(f"rFGSM: attack effectiveness {done.sum().item() / x.size()[0] * 100:.3f}%.")

    # Replace with natural if adversarial loss is higher
    replace_flag = (loss_adv < loss_natural).squeeze()
    x_next[replace_flag] = x[replace_flag]

    return x_next

In [165]:
x_adv = dfgsm_k2(X[:1],y[:1],model)

rFGSM: attack effectiveness 100.000%.


In [166]:
for alpha in range(1, 52, 10):
  print('alpha = ',alpha)
  x_adv = dfgsm_k2(X, y, model, k=20, epsilon=0.02, alpha=alpha,random=True)
  print(X.sum()/batch_size)
  print(x_adv.sum()/batch_size)
  print('***************************')

alpha =  1
rFGSM: attack effectiveness 100.000%.
tensor(28.0148)
tensor(2246.3103)
***************************
alpha =  11
rFGSM: attack effectiveness 100.000%.
tensor(28.0148)
tensor(228.2020)
***************************
alpha =  21
rFGSM: attack effectiveness 100.000%.
tensor(28.0148)
tensor(132.3547)
***************************
alpha =  31
rFGSM: attack effectiveness 99.507%.
tensor(28.0148)
tensor(98.7537)
***************************
alpha =  41
rFGSM: attack effectiveness 98.030%.
tensor(28.0148)
tensor(82.3054)
***************************
alpha =  51
rFGSM: attack effectiveness 88.177%.
tensor(28.0148)
tensor(71.4335)
***************************


In [167]:
total_samples = 0
total_attack_success = 0
total_features_x = 0
total_features_x_adv = 0

for X, y in test_loader:
    batch_size = X.size(0)
    x_adv = dfgsm_k2(X, y, model, k=20, epsilon=0.02, alpha=40,random=True,is_report_loss_diff=False)

    # Compute mean number of features
    total_features_x += X.sum().item()
    total_features_x_adv += x_adv.sum().item()

    # Compute attack effectiveness for this batch
    outputs_adv = model(x_adv)
    _, predicted_adv = torch.topk(outputs_adv, k=1)
    total_attack_success += torch.sum(predicted_adv != y).item()
    total_samples += batch_size

# Compute mean number of features
mean_features_x = total_features_x / total_samples
mean_features_x_adv = total_features_x_adv / total_samples

# Compute mean attack effectiveness
mean_attack_effectiveness = (total_attack_success / total_samples) * 100

# Print results
print('Mean number of features (X):', mean_features_x)
print('Mean number of features (X_adv):', mean_features_x_adv)
print('Mean attack effectiveness:', mean_attack_effectiveness)

Mean number of features (X): 27.5364492500872
Mean number of features (X_adv): 82.96155485796226
Mean attack effectiveness: 96.48878037437507


.clone(): This method creates a deep copy of the tensor, including its data and gradients (if any). It essentially creates a new tensor with the same data and properties as the original tensor. If the original tensor is part of a computation graph and requires gradients, the cloned tensor will also be part of the same computation graph and will require gradients. Any changes made to the cloned tensor will not affect the original tensor, and vice versa.

.copy(): This method creates a shallow copy of the tensor. It only copies the data, not the computational graph or gradients. Therefore, the copied tensor will be detached from any computation graph and will not require gradients, even if the original tensor did. Changes made to the copied tensor will not affect the original tensor, but changes in the original tensor's data will be reflected in the copied tensor.



In [132]:
def bga_k(x, y, model, k=25, alpha=1., is_report_loss_diff=True, use_sample=False):
    """
    Multi-step bit gradient ascent
    :param x: feature vector
    :param y: ground truth labels
    :param model: neural network model
    :param k: number of steps
    :param alpha: hyperparameter for controlling updates
    :param is_report_loss_diff: flag to report loss difference
    :param use_sample: flag to sample randomly from the feasible area
    :return: the adversarial version of x according to bga_k (tensor)
    """
    model.eval()

    # Compute natural loss
    criterion = nn.CrossEntropyLoss(reduction='none')
    loss_natural = criterion(model(x), y.view(-1).long())

    # Initialize worst loss and corresponding adversarial samples
    loss_worst = loss_natural.clone()
    x_worst = x.clone()

    # Book-keeping
    sqrt_m = torch.sqrt(torch.tensor([x.size()[1]], dtype=torch.float))

    # Multi-step with gradients
    for t in range(k):
        if t == 0:
            # Initialize starting point
            x_next = get_x0(x, use_sample)
        else:
            # Compute gradient
            grad_vars = torch.autograd.grad(loss.mean(), x_var)
            grad_data = grad_vars[0].data

            # Compute the updates
            x_update = (sqrt_m * (1. - 2. * x_next) * grad_data >= (alpha * torch.norm(grad_data, 2, 1).unsqueeze(1))).float()

            # Find the next sample with projection to the feasible set
            x_next = xor_float_tensors(x_update, x_next)
            x_next = or_float_tensors(x_next, x)

        # Forward pass
        x_var = x_next.clone().detach().requires_grad_(True)
        y_model = model(x_var)
        loss = criterion(y_model, y.view(-1).long())

        # Update worst loss and adversarial samples
        replace_flag = (loss.data > loss_worst)
        loss_worst[replace_flag] = loss.data[replace_flag]
        x_worst[replace_flag] = x_next[replace_flag]

    if is_report_loss_diff:
        #print(f"Natural loss: {loss_natural.mean():.4f}, Adversarial loss: {loss_worst.mean():.4f}, Difference: {(loss_worst.mean() - loss_natural.mean()):.4f}")
        outputs = model(x_worst)
        _, predicted = torch.topk(outputs, k=1)
        done = (predicted != y).squeeze()
        print(f"rFGSM: attack effectiveness {done.sum().item() / x.size()[0] * 100:.3f}%.")

    return x_worst


In [133]:
x_adv = bga_k(X, y, model, k=25, is_report_loss_diff=True, use_sample=False)
print(X.sum()/batch_size)
print(x_adv.sum()/batch_size)

rFGSM: attack effectiveness 100.000%.
tensor(28.0148)
tensor(2289.4089)


In [135]:
for alpha in range(1,6):
  print('alpha = ',alpha)
  x_adv = bga_k(X, y, model, k=25, alpha=alpha, is_report_loss_diff=True, use_sample=False)
  print(X.sum()/batch_size)
  print(x_adv.sum()/batch_size)
  print('***************************')

alpha =  1
rFGSM: attack effectiveness 100.000%.
tensor(28.0148)
tensor(2289.4089)
***************************
alpha =  2
rFGSM: attack effectiveness 100.000%.
tensor(28.0148)
tensor(577.0690)
***************************
alpha =  3
rFGSM: attack effectiveness 100.000%.
tensor(28.0148)
tensor(185.2315)
***************************
alpha =  4
rFGSM: attack effectiveness 95.567%.
tensor(28.0148)
tensor(71.5714)
***************************
alpha =  5
rFGSM: attack effectiveness 1.478%.
tensor(28.0148)
tensor(28.0887)
***************************
