<a href="https://colab.research.google.com/github/mostafa-ja/mal_adv3/blob/main/4_adverserial_attacks_drebin.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

download_links = ['https://github.com/mostafa-ja/mal_adv3/raw/main/drebin/sparse_matrix_0.npz',
                  'https://github.com/mostafa-ja/mal_adv3/raw/main/drebin/sparse_matrix_1.npz',
                  'https://github.com/mostafa-ja/mal_adv3/raw/main/drebin/sparse_matrix_2.npz',
                  'https://github.com/mostafa-ja/mal_adv3/raw/main/drebin/sparse_matrix_y0.npz',
                  'https://github.com/mostafa-ja/mal_adv3/raw/main/drebin/sparse_matrix_y1.npz',
                  'https://github.com/mostafa-ja/mal_adv3/raw/main/drebin/sparse_matrix_y2.npz',
                  'https://github.com/mostafa-ja/mal_adv3/raw/main/drebin/model_DNN_drebin_best.pth',
                  'https://github.com/mostafa-ja/mal_adv3/raw/main/data/adverserial_attacks_functions.py',
                  'https://github.com/mostafa-ja/mal_adv3/raw/main/drebin/model_AT_rFGSM_weightedLoss.pth'
]

In [None]:
import gdown
output_filepath = '/content/'
for link in download_links:
  gdown.download(link, output_filepath)


Downloading...
From: https://github.com/mostafa-ja/mal_adv3/raw/main/drebin/sparse_matrix_0.npz
To: /content/sparse_matrix_0.npz
100%|██████████| 461k/461k [00:00<00:00, 6.60MB/s]
Downloading...
From: https://github.com/mostafa-ja/mal_adv3/raw/main/drebin/sparse_matrix_1.npz
To: /content/sparse_matrix_1.npz
100%|██████████| 148k/148k [00:00<00:00, 3.40MB/s]
Downloading...
From: https://github.com/mostafa-ja/mal_adv3/raw/main/drebin/sparse_matrix_2.npz
To: /content/sparse_matrix_2.npz
100%|██████████| 150k/150k [00:00<00:00, 2.56MB/s]
Downloading...
From: https://github.com/mostafa-ja/mal_adv3/raw/main/drebin/sparse_matrix_y0.npz
To: /content/sparse_matrix_y0.npz
100%|██████████| 5.79k/5.79k [00:00<00:00, 14.8MB/s]
Downloading...
From: https://github.com/mostafa-ja/mal_adv3/raw/main/drebin/sparse_matrix_y1.npz
To: /content/sparse_matrix_y1.npz
100%|██████████| 2.64k/2.64k [00:00<00:00, 10.5MB/s]
Downloading...
From: https://github.com/mostafa-ja/mal_adv3/raw/main/drebin/sparse_matrix_y2

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score,balanced_accuracy_score
import numpy as np
from scipy import sparse
import matplotlib.pyplot as plt
import pickle
import random
import time

from adverserial_attacks_functions import *

torch.manual_seed(0)

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [None]:

# Load dataset
X_train = sparse.load_npz("/content/sparse_matrix_0.npz").toarray()
X_val = sparse.load_npz("/content/sparse_matrix_1.npz").toarray()
X_test = sparse.load_npz("/content/sparse_matrix_2.npz").toarray()

X_train = torch.tensor(X_train, dtype=torch.int8)
X_val = torch.tensor(X_val, dtype=torch.int8)
X_test = torch.tensor(X_test, dtype=torch.int8)


y_train = sparse.load_npz("/content/sparse_matrix_y0.npz").toarray().reshape((-1, 1))
y_val = sparse.load_npz("/content/sparse_matrix_y1.npz").toarray().reshape((-1, 1))
y_test = sparse.load_npz("/content/sparse_matrix_y2.npz").toarray().reshape((-1, 1))

y_train = torch.tensor(y_train, dtype=torch.int8)
y_val = torch.tensor(y_val, dtype=torch.int8)
y_test = torch.tensor(y_test, dtype=torch.int8)


print("Shapes:")
print("x_train:", X_train.shape)
print("x_val:", X_val.shape)
print("x_test:", X_test.shape)
print("y_train:", y_train.shape)
print("y_val:", y_val.shape)
print("y_test:", y_test.shape)

Shapes:
x_train: torch.Size([28683, 10000])
x_val: torch.Size([9562, 10000])
x_test: torch.Size([9562, 10000])
y_train: torch.Size([28683, 1])
y_val: torch.Size([9562, 1])
y_test: torch.Size([9562, 1])


In [None]:
# number of benigns and malicious sample in training dataset
n_ben = (y_train.squeeze()== 0).sum().item()
n_mal = (y_train.squeeze()== 1).sum().item()
print('the proportion of malwares : ', n_mal/(n_mal+n_ben))

# Combine features and labels into datasets
train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)
test_dataset = TensorDataset(X_test, y_test)

# Define the DataLoader for training, validation, and test sets
batch_size = 128
train_loader = DataLoader(train_dataset, batch_size=batch_size,shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Clear unnecessary variables
del train_dataset, val_dataset, test_dataset, y_train, y_val, y_test


the proportion of malwares :  0.11386535578565701


In [None]:
class MalwareDetectionModel(nn.Module):
    def __init__(self, input_size=10000, hidden_1_size=200, hidden_2_size=200, num_labels=2, dropout_prob=0.6):
        super(MalwareDetectionModel, self).__init__()

        self.input_size = input_size
        self.hidden_1_size = hidden_1_size
        self.hidden_2_size = hidden_2_size
        self.num_labels = num_labels
        self.dropout_prob = dropout_prob

        self.fc1 = nn.Linear(input_size, hidden_1_size)
        self.relu1 = nn.ReLU()
        #self.dropout1 = nn.Dropout(dropout_prob)
        self.fc2 = nn.Linear(hidden_1_size, hidden_2_size)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(dropout_prob)
        self.fc3 = nn.Linear(hidden_2_size, num_labels)
        #self.log_softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        #x = self.dropout1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        x = self.fc3(x)
        #x = self.log_softmax(x)
        return x


In [None]:
model_DNN = MalwareDetectionModel().to(device)
# Load model parameters
model_DNN.load_state_dict(torch.load('model_DNN_drebin_best.pth', map_location=torch.device(device)))

<All keys matched successfully>

In [None]:
def model_evaluation(model, test_loader, device):
    model.eval()  # Set the model to evaluation mode
    predictions = []
    true_labels = []

    with torch.no_grad():
        for X_batch, labels_batch in test_loader:
            X_batch, labels_batch = X_batch.to(torch.float32).to(device), labels_batch.to(device)
            outputs = model(X_batch)
            _, predicted = torch.topk(outputs, k=1)
            predictions.extend(predicted.tolist())
            true_labels.extend(labels_batch.tolist())

    # Convert predictions and true labels to numpy arrays
    predictions = np.array(predictions)
    true_labels = np.array(true_labels)

    # Calculate and print test accuracy
    accuracy = accuracy_score(true_labels, predictions)
    balanced_acc = balanced_accuracy_score(true_labels, predictions)
    print(f'Test Accuracy: {accuracy:.4f}')
    print(f'Test balanced Accuracy: {balanced_acc:.4f}')

    # Calculate and print precision, recall, and F1-score
    precision = precision_score(true_labels, predictions)
    recall = recall_score(true_labels, predictions)
    f1 = f1_score(true_labels, predictions)

    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1-score: {f1:.4f}')

    # Calculate and print true positives, true negatives, false positives, and false negatives
    TP = ((predictions == 1) & (true_labels == 1)).sum()
    TN = ((predictions == 0) & (true_labels == 0)).sum()
    FP = ((predictions == 1) & (true_labels == 0)).sum()
    FN = ((predictions == 0) & (true_labels == 1)).sum()

    print(f'True Positives (TP): {TP}')
    print(f'True Negatives (TN): {TN}')
    print(f'False Positives (FP): {FP}')
    print(f'False Negatives (FN): {FN}')

    # Calculate and print False Negative Rate (FNR) and False Positive Rate (FPR)
    FNR = (FN / (FN + TP)) * 100
    FPR = (FP / (FP + TN)) * 100

    print(f'False Negative Rate (FNR): {FNR:.4f}')
    print(f'False Positive Rate (FPR): {FPR:.4f}')

In [None]:
model_evaluation(model_DNN, test_loader, device)

Test Accuracy: 0.9912
Test balanced Accuracy: 0.9785
Precision: 0.9637
Recall: 0.9619
F1-score: 0.9628
True Positives (TP): 1087
True Negatives (TN): 8391
False Positives (FP): 41
False Negatives (FN): 43
False Negative Rate (FNR): 3.8053
False Positive Rate (FPR): 0.4862


In [None]:
def adjust_learning_rate(optimizer, epoch, lr_step=(25,35,45), lr_decay_ratio=0.2):
    """Adjust the learning rate based on the epoch number."""
    if epoch == 0:
        optimizer.param_groups[0]['lr'] /= 8
    elif epoch in [1, 2, 3]:  # in step five , we finish warm up ,and start normal learning rate
        optimizer.param_groups[0]['lr'] *= 2
    if epoch in lr_step: # in these steps , we are geting close to optimal point so we need to have shorter step
        optimizer.param_groups[0]['lr'] *= lr_decay_ratio
    return optimizer


In [None]:
def adversarial_training(model, train_loader, val_loader, attack, adv_epochs=50, lr=0.001, weight_decay=0., device=device, verbose=True, **kwargs):

    # Assuming positive class (malware) is label 1
    class_weights = torch.tensor([0.11, 0.89]).to(device)  # Adjust the weights based on the class distribution, higher weight for positive class

    # Define Loss Function and Optimizer
    criterion = nn.CrossEntropyLoss(weight=class_weights)
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    total_time = 0.
    nbatches = len(train_loader)
    best_acc_val = 0.
    acc_val_adv_be = 0.
    best_epoch = 0

    for epoch in range(adv_epochs):
        epoch_losses = []
        epoch_accuracies = []
        optimizer = adjust_learning_rate(optimizer, epoch)

        for idx_batch, (x_batch, y_batch) in enumerate(train_loader):
            x_batch, y_batch = x_batch.to(torch.float32).to(device), y_batch.to(device)
            batch_size = x_batch.shape[0]

            # Separate malicious and benign samples
            mal_x_batch, ben_x_batch = x_batch[y_batch.squeeze() == 1], x_batch[y_batch.squeeze() == 0]
            mal_y_batch, ben_y_batch = y_batch[y_batch.squeeze() == 1], y_batch[y_batch.squeeze() == 0]

            # Generate adversarial examples
            model.eval()
            pertb_mal_x = attack(mal_x_batch, mal_y_batch, model, **kwargs)
            x_batch = torch.cat([ben_x_batch, pertb_mal_x], dim=0)
            y_batch = torch.cat([ben_y_batch, mal_y_batch])
            model.train()

            # Forward pass and backward pass
            optimizer.zero_grad()
            outputs = model(x_batch)
            loss_train = criterion(outputs, y_batch.view(-1).long())
            loss_train.backward()
            optimizer.step()

            # Calculate metrics
            epoch_losses.append(loss_train.item())
            predicted = outputs.argmax(1).unsqueeze(1)
            acc_train = (predicted == y_batch).sum().item() / len(y_batch)
            epoch_accuracies.append(acc_train)

            # Print batch level information
            #if verbose:
                #print(f'Mini batch: {idx_batch + 1}/{nbatches} | Epoch: {epoch + 1}/{adv_epochs} | Batch Loss: {loss_train.item():.4f} | Batch Accuracy: {acc_train * 100:.2f}%')

        # Calculate epoch level metrics
        mean_loss = np.mean(epoch_losses)
        mean_accuracy = np.mean(epoch_accuracies) * 100

        # Print epoch level information
        if verbose:
            print(f'Epoch: {epoch+1}/{adv_epochs} | Training loss (epoch level): {mean_loss:.4f} | Train accuracy: {mean_accuracy:.2f}%')

        # Evaluation on validation set
        model.eval()
        avg_acc_ad_val = []
        avg_acc_val = []
        for x_val, y_val in val_loader:
            x_val, y_val = x_val.to(torch.float32).to(device), y_val.to(device)
            outputs = model(x_val)
            predicted = outputs.argmax(1).unsqueeze(1)
            acc_val = (predicted == y_val).sum().item() / len(y_val)
            avg_acc_val.append(acc_val)

            # Generate adversarial examples for validation set
            mal_x_batch, mal_y_batch = x_val[y_val.squeeze() == 1], y_val[y_val.squeeze() == 1]
            pertb_mal_x = attack(mal_x_batch, mal_y_batch, model, **kwargs)
            outputs = model(pertb_mal_x)
            y_pred = outputs.argmax(1).unsqueeze(1)

            acc_ad_val = (y_pred == 1.).sum().item() / len(y_pred)
            avg_acc_ad_val.append(acc_ad_val)

        # Calculate validation accuracy
        assert len(avg_acc_ad_val) > 0
        acc_all = (np.mean(avg_acc_val) + np.mean(avg_acc_ad_val)) / 2.

        # Update best validation accuracy
        if acc_all >= best_acc_val:
            best_acc_val = acc_all
            acc_val_adv_be = np.mean(avg_acc_ad_val)
            best_epoch = epoch + 1
            torch.save(model.state_dict(), 'best_AT_model.pth')

        # Print validation results
        if verbose:
            print(f"\tVal accuracy(without attack) {np.mean(avg_acc_val) * 100:.4}% and accuracy(with attack) {np.mean(avg_acc_ad_val) * 100:.4}% under attack and overall accuracy {acc_all * 100:.4}%.")
            print(f"\tModel select at epoch {best_epoch} with validation accuracy {best_acc_val * 100:.4}% and accuracy {acc_val_adv_be * 100:.4}% under attack.")


In [None]:
def adv_predict(test_loader, model, attack, device, **kwargs):

    if attack == mimicry:
      # Pre-select benign samples
      benign_samples = []
      for x_batch, y_batch in test_loader:
        benign_samples.append(x_batch[y_batch.squeeze() == 0])

      ben_x = torch.cat(benign_samples, dim=0).to(device)
      del benign_samples

    model.eval()
    avg_acc_ad_test = []
    avg_acc_test = []
    with torch.no_grad():
        for x_test, y_test in test_loader:
            x_test, y_test = x_test.to(torch.float32).to(device), y_test.to(device)
            outputs = model(x_test)
            _, predicted = torch.topk(outputs, k=1)
            acc_test = (predicted == y_test).sum().item() / len(y_test)
            avg_acc_test.append(acc_test)

            # Generate adversarial examples for test set
            mal_x_batch, mal_y_batch = x_test[y_test.squeeze() == 1], y_test[y_test.squeeze() == 1]

            if attack == mimicry:
                pertb_mal_x = mimicry(ben_x, mal_x_batch, model, **kwargs)
            else :
                with torch.enable_grad():
                    pertb_mal_x = attack(mal_x_batch, mal_y_batch, model, **kwargs)

            outputs = model(pertb_mal_x)
            _, y_pred = torch.topk(outputs, k=1)

            acc_ad_test = (y_pred == 1.).sum().item() / len(y_pred)
            avg_acc_ad_test.append(acc_ad_test)

    # Calculate test accuracy
    print(f"Adversarial accuracy (without attack): {np.mean(avg_acc_test) * 100:.4}% | Under attack: {np.mean(avg_acc_ad_test) * 100:.4}%.")
    if attack == mimicry:
      del ben_x


In [None]:
# AT-rFGSM: Adversarial Taraining based on rFGSM attack
model_AT = MalwareDetectionModel().to(device)

attack_param = {"k":50, "epsilon":0.02, 'random':True, "is_sample":False, 'is_report_loss_diff':False}
adversarial_training(model_AT, train_loader, val_loader, adv_epochs=50, attack=dfgsm_k, **attack_param)

Training loss (epoch level): 0.4688 | Train accuracy: 84.47%
	Val accuracy(without attack) 96.55% and accuracy(with attack) 71.52% under attack and overall accuracy 84.04%.
	Model select at epoch 1 with validation accuracy 84.04% and accuracy 71.52% under attack.
Training loss (epoch level): 0.1100 | Train accuracy: 96.80%
	Val accuracy(without attack) 98.09% and accuracy(with attack) 85.61% under attack and overall accuracy 91.85%.
	Model select at epoch 2 with validation accuracy 91.85% and accuracy 85.61% under attack.
Training loss (epoch level): 0.0643 | Train accuracy: 98.12%
	Val accuracy(without attack) 98.39% and accuracy(with attack) 89.02% under attack and overall accuracy 93.7%.
	Model select at epoch 3 with validation accuracy 93.7% and accuracy 89.02% under attack.
Training loss (epoch level): 0.0489 | Train accuracy: 98.43%
	Val accuracy(without attack) 98.78% and accuracy(with attack) 88.53% under attack and overall accuracy 93.66%.
	Model select at epoch 3 with validat

In [None]:
# Create an instance of your model
model_AT_rFGSM = MalwareDetectionModel().to(device)

# Load model parameters
model_AT_rFGSM.load_state_dict(torch.load('best_model.pth', map_location=torch.device(device)))

<All keys matched successfully>

In [None]:
model_evaluation(model_AT_rFGSM,test_loader,device)

Test Accuracy: 0.9886
Test balanced Accuracy: 0.9663
Precision: 0.9654
Recall: 0.9372
F1-score: 0.9511
True Positives (TP): 1059
True Negatives (TN): 8394
False Positives (FP): 38
False Negatives (FN): 71
False Negative Rate (FNR): 6.2832
False Positive Rate (FPR): 0.4507


In [None]:
# with weighted loss
# AT-rFGSM: Adversarial Taraining based on rFGSM attack
model_AT2 = MalwareDetectionModel().to(device)

attack_param = {"k":50, "epsilon":0.02, 'random':True, "is_sample":False, 'is_report_loss_diff':False}
adversarial_training(model_AT2, train_loader, val_loader, adv_epochs=50, attack=dfgsm_k, **attack_param)

Epoch: 1/50 | Training loss (epoch level): 0.5498 | Train accuracy: 80.06%
	Val accuracy(without attack) 90.37% and accuracy(with attack) 93.56% under attack and overall accuracy 91.96%.
	Model select at epoch 1 with validation accuracy 91.96% and accuracy 93.56% under attack.
Epoch: 2/50 | Training loss (epoch level): 0.2260 | Train accuracy: 94.87%
	Val accuracy(without attack) 95.99% and accuracy(with attack) 93.56% under attack and overall accuracy 94.77%.
	Model select at epoch 2 with validation accuracy 94.77% and accuracy 93.56% under attack.
Epoch: 3/50 | Training loss (epoch level): 0.1326 | Train accuracy: 96.29%
	Val accuracy(without attack) 95.99% and accuracy(with attack) 95.85% under attack and overall accuracy 95.92%.
	Model select at epoch 3 with validation accuracy 95.92% and accuracy 95.85% under attack.
Epoch: 4/50 | Training loss (epoch level): 0.1006 | Train accuracy: 96.87%
	Val accuracy(without attack) 96.51% and accuracy(with attack) 96.57% under attack and over

In [None]:
# Create an instance of your model
model_AT_rFGSM_weightedLoss = MalwareDetectionModel().to(device)

# Load model parameters
model_AT_rFGSM_weightedLoss.load_state_dict(torch.load('best_AT_model.pth', map_location=torch.device(device)))

<All keys matched successfully>

In [None]:
model_evaluation(model_AT_rFGSM_weightedLoss,test_loader,device)

Test Accuracy: 0.9861
Test balanced Accuracy: 0.9745
Precision: 0.9257
Recall: 0.9593
F1-score: 0.9422
True Positives (TP): 1084
True Negatives (TN): 8345
False Positives (FP): 87
False Negatives (FN): 46
False Negative Rate (FNR): 4.0708
False Positive Rate (FPR): 1.0318


In [None]:
# rFGSM
attack_params = {"k":50, "epsilon":0.02, 'random':True, 'is_report_loss_diff':False, 'is_sample':False}
adv_predict(test_loader, model_DNN, dfgsm_k, device, **attack_params)
adv_predict(test_loader, model_AT_rFGSM, dfgsm_k, device, **attack_params)
adv_predict(test_loader, model_AT_rFGSM_weightedLoss, dfgsm_k, device, **attack_params)


Adversarial accuracy (without attack): 99.12% | Under attack: 0.0%.
Adversarial accuracy (without attack): 98.86% | Under attack: 93.51%.
Adversarial accuracy (without attack): 98.61% | Under attack: 94.8%.


In [None]:
# rFGSM
attack_params = {"k":100, "epsilon":0.02, 'random':True, 'is_report_loss_diff':False, 'is_sample':False}
adv_predict(test_loader, model_DNN, dfgsm_k, device, **attack_params)
adv_predict(test_loader, model_AT_rFGSM, dfgsm_k, device, **attack_params)
adv_predict(test_loader, model_AT_rFGSM_weightedLoss, dfgsm_k, device, **attack_params)

Adversarial accuracy (without attack): 99.12% | Under attack: 0.0%.
Adversarial accuracy (without attack): 98.86% | Under attack: 93.44%.
Adversarial accuracy (without attack): 98.61% | Under attack: 95.05%.


In [None]:
from torchsummary import summary

summary(model_AT_rFGSM, input_size=(10000,))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                  [-1, 200]       2,000,200
              ReLU-2                  [-1, 200]               0
           Dropout-3                  [-1, 200]               0
            Linear-4                  [-1, 200]          40,200
              ReLU-5                  [-1, 200]               0
           Dropout-6                  [-1, 200]               0
            Linear-7                    [-1, 2]             402
        LogSoftmax-8                    [-1, 2]               0
Total params: 2,040,802
Trainable params: 2,040,802
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.04
Forward/backward pass size (MB): 0.01
Params size (MB): 7.79
Estimated Total Size (MB): 7.83
----------------------------------------------------------------


In [None]:
model_evaluation(model_AT_rFGSM,test_loader)

Test Accuracy: 0.9936
Precision: 0.9365
Recall: 0.9146
F1-score: 0.9254
True Positives (TP): 1017
True Negatives (TN): 24622
False Positives (FP): 69
False Negatives (FN): 95
False Negative Rate (FNR): 0.0854
False Positive Rate (FPR): 0.0028


In [None]:
model_evaluation(model_DNN,test_loader)

Test Accuracy: 0.9934
Precision: 0.9654
Recall: 0.8786
F1-score: 0.9200
True Positives (TP): 977
True Negatives (TN): 24656
False Positives (FP): 35
False Negatives (FN): 135
False Negative Rate (FNR): 0.1214
False Positive Rate (FPR): 0.0014


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Evaluate the trained model on the test set
model_AT_rFGSM.eval()  # Set the model to evaluation mode
predictions = []
true_labels = []

with torch.no_grad():
    for X_batch, labels_batch in test_loader:
        outputs = model_AT_rFGSM(X_batch)
        _, predicted = torch.topk(outputs, k=1)
        predictions.extend(predicted.tolist())
        true_labels.extend(labels_batch.tolist())

# Convert predictions and true labels to numpy arrays
predictions = np.array(predictions)
true_labels = np.array(true_labels)

# Calculate test accuracy
accuracy = accuracy_score(true_labels, predictions)
print(f'Test Accuracy: {accuracy:.4f}')

# Calculate precision, recall, and F1-score
precision = precision_score(true_labels, predictions)
recall = recall_score(true_labels, predictions)
f1 = f1_score(true_labels, predictions)

print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1-score: {f1:.4f}')

RuntimeError: mat1 and mat2 must have the same dtype, but got Char and Float

In [None]:
adv_predict(test_loader, model_DNN, attack=pgd, is_report_loss_diff=False, device=device)

Adversarial accuracy (without attack): 99.34% | Under attack: 0.0%.


In [None]:
adv_predict(test_loader, model_AT_rFGSM, attack=pgd, is_report_loss_diff=False, device=device)

Adversarial accuracy (without attack): 99.36% | Under attack: 90.83%.


In [None]:
# Groose
attack_params = {"k":100, 'is_report_loss_diff':False}
adv_predict(test_loader, model_DNN, grosse_k, device, **attack_params)
adv_predict(test_loader, model_AT_rFGSM, grosse_k, device, **attack_params)

Adversarial accuracy (without attack): 99.34% | Under attack: 0.0%.
Adversarial accuracy (without attack): 99.36% | Under attack: 83.93%.


In [None]:
# BCA
attack_params = {"k":100, 'is_report_loss_diff':False, 'use_sample':False}
adv_predict(test_loader, model_DNN, bca_k, device, **attack_params)
adv_predict(test_loader, model_AT_rFGSM, bca_k, device, **attack_params)

Adversarial accuracy (without attack): 99.34% | Under attack: 0.0%.
Adversarial accuracy (without attack): 99.36% | Under attack: 83.93%.


In [None]:
# BGA
attack_params = {"k":100, 'is_report_loss_diff':False, 'use_sample':False}
adv_predict(test_loader, model_DNN, bga_k, device, **attack_params)
adv_predict(test_loader, model_AT_rFGSM, bga_k, device, **attack_params)

Adversarial accuracy (without attack): 99.34% | Under attack: 0.0%.
Adversarial accuracy (without attack): 99.36% | Under attack: 90.87%.


In [None]:
# rFGSM
attack_params = {"k":100, "epsilon":0.02, 'random':True, 'is_report_loss_diff':False, 'is_sample':False}
adv_predict(test_loader, model_DNN, dfgsm_k, device, **attack_params)
adv_predict(test_loader, model_AT_rFGSM, dfgsm_k, device, **attack_params)

Adversarial accuracy (without attack): 99.34% | Under attack: 0.0%.
Adversarial accuracy (without attack): 99.36% | Under attack: 89.91%.


In [None]:
# PGD-l1
attack_params = {"k":500, "step_length":1., 'norm':'l1', 'random':False, 'is_report_loss_diff':False, 'is_sample':False}
adv_predict(test_loader, model_DNN, pgd, device, **attack_params)
adv_predict(test_loader, model_AT_rFGSM, pgd, device, **attack_params)

Adversarial accuracy (without attack): 99.34% | Under attack: 0.0%.
Adversarial accuracy (without attack): 99.36% | Under attack: 82.9%.


In [None]:
# PGD-l2
attack_params = {"k":200, "step_length":0.05, 'norm':'l2', 'random':False, 'is_report_loss_diff':False, 'is_sample':False}
adv_predict(test_loader, model_DNN, pgd, device, **attack_params)
adv_predict(test_loader, model_AT_rFGSM, pgd, device, **attack_params)

Adversarial accuracy (without attack): 99.34% | Under attack: 40.03%.
Adversarial accuracy (without attack): 99.36% | Under attack: 91.56%.


In [None]:
# PGD-linf
attack_params = {"k":500, "step_length":0.002, 'norm':'linf', 'random':False, 'is_report_loss_diff':False, 'is_sample':False}
adv_predict(test_loader, model_DNN, pgd, device, **attack_params)
adv_predict(test_loader, model_AT_rFGSM, pgd, device, **attack_params)

Adversarial accuracy (without attack): 99.34% | Under attack: 0.0%.
Adversarial accuracy (without attack): 99.36% | Under attack: 86.74%.


In [None]:
# Mimicry×1
attack_params = {"trials":1, 'is_report_loss_diff':False}
adv_predict(test_loader, model_DNN, mimicry, device, **attack_params)
adv_predict(test_loader, model_AT_rFGSM, mimicry, device, **attack_params)

Adversarial accuracy (without attack): 99.34% | Under attack: 84.44%.
Adversarial accuracy (without attack): 99.36% | Under attack: 92.63%.


In [None]:
# Mimicry×10
attack_params = {"trials":10, 'is_report_loss_diff':False}
adv_predict(test_loader, model_DNN, mimicry, device, **attack_params)
adv_predict(test_loader, model_AT_rFGSM, mimicry, device, **attack_params)

Adversarial accuracy (without attack): 99.34% | Under attack: 4.929%.
Adversarial accuracy (without attack): 99.36% | Under attack: 83.69%.


In [None]:
# Mimicry×30
attack_params = {"trials":30, 'is_report_loss_diff':False}
adv_predict(test_loader, model_DNN, mimicry, device, **attack_params)
adv_predict(test_loader, model_AT_rFGSM, mimicry, device, **attack_params)

Adversarial accuracy (without attack): 99.34% | Under attack: 4.929%.
Adversarial accuracy (without attack): 99.36% | Under attack: 83.69%.


In [None]:
# Mimicry×100
attack_params = {"trials":100, 'is_report_loss_diff':False}
adv_predict(test_loader, model_DNN, mimicry, device, **attack_params)
adv_predict(test_loader, model_AT_rFGSM, mimicry, device, **attack_params)

Adversarial accuracy (without attack): 99.34% | Under attack: 4.929%.
Adversarial accuracy (without attack): 99.36% | Under attack: 83.59%.


In [None]:
# Mimicry×1000
attack_params = {"trials":1000, 'is_report_loss_diff':False}
adv_predict(test_loader, model_DNN, mimicry, device, **attack_params)
adv_predict(test_loader, model_AT_rFGSM, mimicry, device, **attack_params)

Adversarial accuracy (without attack): 99.34% | Under attack: 1.517%.
Adversarial accuracy (without attack): 99.36% | Under attack: 82.68%.


In [None]:
# Mimicry×1000
attack_params = {"trials":5000, 'is_report_loss_diff':False}
adv_predict(test_loader, model_DNN, mimicry, device, **attack_params)
adv_predict(test_loader, model_AT_rFGSM, mimicry, device, **attack_params)

Adversarial accuracy (without attack): 99.34% | Under attack: 0.0%.
Adversarial accuracy (without attack): 99.36% | Under attack: 74.55%.


In [None]:
attack_params = {'step_lengths':{"l1": 1.0, "l2": 0.05, "linf": 0.001}, "steps":500}
adv_predict(test_loader, model_AT_rFGSM, StepwiseMax_onestep2, device, **attack_params)

step-wise max: attack effectiveness 0.000%.
step-wise max: attack effectiveness 10.000%.


KeyboardInterrupt: 

In [None]:
# Define different attacks with their parameters
attacks = [
    (dfgsm_k, {"k":100, "epsilon":0.02, 'is_report_loss_diff' : False}),
    (bga_k, {"k":100, 'is_report_loss_diff' : False}),
    (bca_k, {"k":100, 'is_report_loss_diff' : False}),
    (grosse_k, {"k":100, 'is_report_loss_diff' : False}),

    (pgd, {'k': 100, 'step_length': 1., 'norm': 'l1', 'is_report_loss_diff' : False}),
    (pgd, {'k': 200, 'step_length': 0.05, 'norm': 'l2', 'is_report_loss_diff' : False}),
    (pgd, {'k': 100, 'step_length': 0.02, 'norm': 'linf', 'is_report_loss_diff' : False}),
    (StepwiseMax_onestep2, {'step_lengths':{"l1": 1.0, "l2": 0.05, "linf": 0.0008}, "steps":650}),

    # Add more attacks as needed
]

# Iterate over each attack and its parameters
for attack_func, attack_params in attacks:
    print(f"Running attack: {attack_func.__name__} with parameters: {attack_params}")
    adv_predict(test_loader, model_AT_rFGSM, attack_func, device, **attack_params)
    print()  # Print an empty line for separation


Running attack: dfgsm_k with parameters: {'k': 50, 'epsilon': 0.02, 'is_report_loss_diff': False}
Adversarial accuracy (without attack): 99.25% | Under attack: 83.43%.

Running attack: bga_k with parameters: {'k': 25, 'is_report_loss_diff': False}
Adversarial accuracy (without attack): 99.25% | Under attack: 91.57%.

Running attack: bca_k with parameters: {'k': 25, 'is_report_loss_diff': False}
Adversarial accuracy (without attack): 99.25% | Under attack: 87.93%.

Running attack: grosse_k with parameters: {'k': 25, 'is_report_loss_diff': False}
Adversarial accuracy (without attack): 99.25% | Under attack: 87.93%.

Running attack: pgd with parameters: {'k': 100, 'step_length': 1.0, 'norm': 'l1', 'is_report_loss_diff': False}
Adversarial accuracy (without attack): 99.25% | Under attack: 87.24%.

Running attack: pgd with parameters: {'k': 200, 'step_length': 0.05, 'norm': 'l2', 'is_report_loss_diff': False}
Adversarial accuracy (without attack): 99.25% | Under attack: 91.67%.

Running att



```
def mimic_attack_effectiveness_optimized(test_loader, model, seed, trials=1000, device="cuda:0"):
  """
  Calculates the effectiveness of the mimic attack on the given model.

  Args:
      test_loader: A PyTorch dataloader containing the test data.
      model: The PyTorch model to be attacked.
      seed: The random seed for reproducibility.
      trials: The number of random samples to use from the benign class (default: 1000).
      device: The device to use for computations (default: "cuda:0" if available, otherwise "cpu").

  Returns:
      The effectiveness of the mimic attack as a percentage (float).
  """

  torch.manual_seed(seed)
  model.eval()

  # Initialize counters
  successful_attacks = 0
  total_malicious_samples = 0

  # Pre-select benign samples for efficiency
  benign_samples = []
  for x_batch, y_batch in test_loader:
    benign_samples.append(x_batch[y_batch.squeeze() == 0])

  ben_x = torch.cat(benign_samples, dim=0).to(device)

  # Clear unnecessary variables
  del benign_samples

  trials = min(trials, len(ben_x))


  for x_batch, y_batch in test_loader:
    x_batch, y_batch = x_batch.to(device), y_batch.to(device)
    malicious_samples = x_batch[y_batch.squeeze() == 1]

    if len(malicious_samples) > 0:
      # Expand dimensions for efficient broadcasting
      malicious_samples_expanded = malicious_samples.unsqueeze(1).expand(-1, trials, -1)

      # Generate random indices outside the loop
      seed += 1
      torch.manual_seed(seed)
      indices = torch.randperm(len(ben_x), device=device)[:trials]
      trial_vectors_expanded = ben_x[indices].unsqueeze(0)

      # Perform the mimic attack and update counters
      modified_x = torch.clamp(malicious_samples_expanded + trial_vectors_expanded, min=0., max=1.)
      _, done = get_loss(modified_x.view(-1, modified_x.shape[-1]), torch.ones(trials * malicious_samples.shape[0], 1, device=device), model)
      successful_attacks += (done.view(malicious_samples.shape[0], trials).sum(dim=1) > 0).sum().item()
      total_malicious_samples += malicious_samples.shape[0]

  # Calculate and print attack effectiveness
  attack_effectiveness = (successful_attacks / total_malicious_samples) * 100 if total_malicious_samples > 0 else 0
  print(f"Mimic attack effectiveness: {attack_effectiveness:.3f}%.")

  return attack_effectiveness  # Added return statement for clarity

```



In [None]:
def mimicry(ben_x, malicious_samples, model_DNN, trials=30, seed=230, is_report_loss_diff=False):
    """
    Perform a mimicry attack.

    Args:
    - ben_x (torch.Tensor): Benign samples tensor.
    - malicious_samples (torch.Tensor): Malicious samples tensor.
    - model_DNN (torch.nn.Module): PyTorch model used for the attack.
    - trials (int): Number of trials for the attack.
    - seed (int): Random seed for reproducibility.
    - is_report_loss_diff (bool): Flag to indicate whether to report attack effectiveness.

    Returns:
    - adv_x (torch.Tensor): Adversarial examples tensor.
    """

    # Ensure trials do not exceed the length of ben_x
    trials = min(trials, len(ben_x))

    # Get the number of malicious samples
    n_samples = len(malicious_samples)

    if n_samples > 0:
        # Expand dimensions for efficient broadcasting
        malicious_samples_expanded = malicious_samples.unsqueeze(1).expand(-1, trials, -1)

        # Generate random indices for sampling from ben_x
        torch.manual_seed(seed)
        indices = torch.randperm(len(ben_x), device=ben_x.device)[:trials]
        trial_vectors_expanded = ben_x[indices].unsqueeze(0)

        # Perform the mimic attack
        pertbx = torch.clamp(malicious_samples_expanded + trial_vectors_expanded, min=0., max=1.)

        # Compute the loss and check if adversarial examples are successful
        loss, done = get_loss(pertbx.view(-1, pertbx.shape[-1]), torch.ones(n_samples * trials, 1, device=ben_x.device), model_DNN)

        # Add maximum loss to successful attacks to differentiate
        max_v = loss.max()
        loss[done] += max_v

        # Reshape the loss and done tensors
        loss = loss.view(n_samples, trials)
        done = done.view(n_samples, trials)

        # Report attack effectiveness if required
        if is_report_loss_diff:
            n_done = torch.any(done, dim=-1).sum()
            print(f"Mimicry*{trials}: Attack effectiveness {n_done / n_samples * 100:.3f}%.")

        # Get the index of the maximum loss for each sample
        _, indices = loss.max(dim=-1)
        adv_x = pertbx[torch.arange(n_samples), indices]

        del pertbx, loss, done, malicious_samples_expanded, trial_vectors_expanded

        return adv_x
    else:
        print("No malicious samples found.")
        return None




In [None]:
# Mimicry×30
attack_params = {"trials":10, 'is_report_loss_diff':True}
#adv_predict(test_loader, model_DNN, mimicry, device, **attack_params)
adv_predict(test_loader, model_AT_rFGSM, mimicry, device, **attack_params)

Mimicry*10: Attack effectiveness 0.000%.
Mimicry*10: Attack effectiveness 0.000%.
Mimicry*10: Attack effectiveness 33.333%.
Mimicry*10: Attack effectiveness 0.000%.
Mimicry*10: Attack effectiveness 10.526%.
Mimicry*10: Attack effectiveness 40.000%.
Mimicry*10: Attack effectiveness 37.500%.
Mimicry*10: Attack effectiveness 18.750%.
Mimicry*10: Attack effectiveness 12.500%.
Mimicry*10: Attack effectiveness 7.692%.
Mimicry*10: Attack effectiveness 23.529%.
Mimicry*10: Attack effectiveness 30.769%.
Mimicry*10: Attack effectiveness 21.429%.
Mimicry*10: Attack effectiveness 20.000%.
Mimicry*10: Attack effectiveness 0.000%.
Mimicry*10: Attack effectiveness 17.647%.
Mimicry*10: Attack effectiveness 36.842%.
Mimicry*10: Attack effectiveness 8.333%.
Mimicry*10: Attack effectiveness 12.500%.
Mimicry*10: Attack effectiveness 20.000%.
Mimicry*10: Attack effectiveness 22.222%.
Mimicry*10: Attack effectiveness 23.077%.
Mimicry*10: Attack effectiveness 11.111%.
Mimicry*10: Attack effectiveness 33.333%

In [None]:
for x_test, y_test in test_loader:

        # Generate adversarial examples for test set
        mal_x_batch, mal_y_batch = x_test[y_test.squeeze() == 1], y_test[y_test.squeeze() == 1]

        pertb_mal_x = mimicry(ben_x, mal_x_batch, model_AT_rFGSM, trials=30, seed=230, is_report_loss_diff=True)

Mimicry*30: Attack effectiveness 0.000%.
Mimicry*30: Attack effectiveness 0.000%.
Mimicry*30: Attack effectiveness 33.333%.
Mimicry*30: Attack effectiveness 0.000%.
Mimicry*30: Attack effectiveness 10.526%.
Mimicry*30: Attack effectiveness 40.000%.
Mimicry*30: Attack effectiveness 37.500%.
Mimicry*30: Attack effectiveness 18.750%.
Mimicry*30: Attack effectiveness 12.500%.
Mimicry*30: Attack effectiveness 7.692%.
Mimicry*30: Attack effectiveness 23.529%.
Mimicry*30: Attack effectiveness 30.769%.
Mimicry*30: Attack effectiveness 21.429%.
Mimicry*30: Attack effectiveness 20.000%.
Mimicry*30: Attack effectiveness 0.000%.
Mimicry*30: Attack effectiveness 17.647%.
Mimicry*30: Attack effectiveness 36.842%.
Mimicry*30: Attack effectiveness 8.333%.
Mimicry*30: Attack effectiveness 12.500%.
Mimicry*30: Attack effectiveness 20.000%.
Mimicry*30: Attack effectiveness 22.222%.
Mimicry*30: Attack effectiveness 23.077%.
Mimicry*30: Attack effectiveness 11.111%.
Mimicry*30: Attack effectiveness 33.333%

In [None]:
trials = 30
seed = 230
is_report_loss_diff = True

model_DNN.eval()

# Ensure trials do not exceed the length of ben_x
trials = min(trials, len(ben_x))

# Get the number of malicious samples
n_samples = len(malicious_samples)

if n_samples > 0:
    # Expand dimensions for efficient broadcasting
    malicious_samples_expanded = malicious_samples.unsqueeze(1).expand(-1, trials, -1)

    # Generate random indices for sampling from ben_x
    seed += 1
    torch.manual_seed(seed)
    indices = torch.randperm(len(ben_x), device=device)[:trials]
    trial_vectors_expanded = ben_x[indices].unsqueeze(0)

    # Perform the mimic attack
    pertbx = torch.clamp(malicious_samples_expanded + trial_vectors_expanded, min=0., max=1.)

    # Compute the loss and check if adversarial examples are successful
    loss, done = get_loss(pertbx.view(-1, pertbx.shape[-1]), torch.ones(n_samples * trials, 1, device=device), model_DNN)
    print(done)
    # Add maximum loss to successful attacks to differentiate
    max_v = loss.max()
    loss[done] += max_v

    # Reshape the loss and done tensors
    loss = loss.view(n_samples, trials)
    done = done.view(n_samples, trials)

    # Report attack effectiveness if required
    if is_report_loss_diff:
        n_done = torch.any(done, dim=-1).sum()
        print(n_done)
        print(f"Mimicry*{trials}: Attack effectiveness {n_done / n_samples * 100:.3f}%.")

    # Get the index of the maximum loss for each sample
    _, indices = loss.max(dim=-1)
    adv_x = pertbx[torch.arange(n_samples), indices]


tensor([False, False, False, False, False, False, False, False, False, False,
         True,  True, False, False, False,  True, False, False, False, False,
         True, False, False, False, False, False, False,  True, False, False,
         True, False, False,  True,  True, False,  True, False, False, False,
         True,  True, False,  True, False,  True, False, False,  True, False,
         True, False,  True,  True, False, False,  True,  True, False,  True,
        False, False, False, False,  True, False, False, False, False, False,
         True,  True, False, False, False,  True, False, False, False, False,
         True, False, False, False, False, False, False,  True, False, False,
        False, False, False,  True,  True, False,  True, False, False,  True,
         True,  True, False,  True, False,  True, False, False,  True, False,
         True, False,  True,  True, False, False, False,  True, False, False,
        False, False, False, False, False, False, False, False, 

In [None]:
adv_x.shape

torch.Size([12, 10000])

In [None]:
# Example usage:
trials = 30
seed = 230
is_report_loss_diff = True

adv_x = mimicry(ben_x, malicious_samples, model_DNN, trials, seed, is_report_loss_diff, device="cpu")

Mimicry*30: Attack effectiveness 91.667%.


In [None]:
outputs = model_AT_rFGSM(adv_x)
_, y_pred = torch.topk(outputs, k=1)

acc_ad_test = (y_pred == 1.).sum().item() / len(y_pred)
acc_ad_test

1.0

In [None]:
def mimicry(ben_x, malicious_samples, model, seed, trials=1000, device="cpu"):


  model.eval()

  # Initialize counters
  successful_attacks = 0
  total_malicious_samples = 0

  trials = min(trials, len(ben_x))


  if len(malicious_samples) > 0:

    # Generate random indices outside the loop
    seed += 1
    torch.manual_seed(seed)
    indices = torch.randperm(len(ben_x), device=device)[:trials]
    trial_vectors = ben_x[indices]

    # Perform the mimic attack and update counters
    modified_x = torch.clamp(malicious_samples + trial_vectors, min=0., max=1.)
    _, done = get_loss(modified_x.view(-1, modified_x.shape[-1]), torch.ones(trials * malicious_samples.shape[0], 1, device=device), model)
    successful_attacks += (done.view(malicious_samples.shape[0], trials).sum(dim=1) > 0).sum().item()
    total_malicious_samples += malicious_samples.shape[0]

  # Calculate and print attack effectiveness
  attack_effectiveness = (successful_attacks / total_malicious_samples) * 100 if total_malicious_samples > 0 else 0
  print(f"Mimic attack effectiveness: {attack_effectiveness:.3f}%.")

  return attack_effectiveness  # Added return statement for clarity


In [None]:
def mimic_attack_effectiveness_optimized(test_loader, model, seed, trials=1000, device="cuda:0"):
  """
  Calculates the effectiveness of the mimic attack on the given model.

  Args:
      test_loader: A PyTorch dataloader containing the test data.
      model: The PyTorch model to be attacked.
      seed: The random seed for reproducibility.
      trials: The number of random samples to use from the benign class (default: 1000).
      device: The device to use for computations (default: "cuda:0" if available, otherwise "cpu").

  Returns:
      The effectiveness of the mimic attack as a percentage (float).
  """

  torch.manual_seed(seed)
  model.eval()

  # Initialize counters
  successful_attacks = 0
  total_malicious_samples = 0

  # Pre-select benign samples for efficiency
  benign_samples = []
  for x_batch, y_batch in test_loader:
    benign_samples.append(x_batch[y_batch.squeeze() == 0])

  ben_x = torch.cat(benign_samples, dim=0).to(device)

  # Clear unnecessary variables
  del benign_samples

  trials = min(trials, len(ben_x))


  for x_batch, y_batch in test_loader:
    x_batch, y_batch = x_batch.to(device), y_batch.to(device)
    malicious_samples = x_batch[y_batch.squeeze() == 1]

    if len(malicious_samples) > 0:
      # Expand dimensions for efficient broadcasting
      malicious_samples_expanded = malicious_samples.unsqueeze(1).expand(-1, trials, -1)

      # Generate random indices outside the loop
      seed += 1
      torch.manual_seed(seed)
      indices = torch.randperm(len(ben_x), device=device)[:trials]
      trial_vectors_expanded = ben_x[indices].unsqueeze(0)

      # Perform the mimic attack and update counters
      modified_x = torch.clamp(malicious_samples_expanded + trial_vectors_expanded, min=0., max=1.)
      _, done = get_loss(modified_x.view(-1, modified_x.shape[-1]), torch.ones(trials * malicious_samples.shape[0], 1, device=device), model)
      successful_attacks += (done.view(malicious_samples.shape[0], trials).sum(dim=1) > 0).sum().item()
      total_malicious_samples += malicious_samples.shape[0]
      break
  # Calculate and print attack effectiveness
  attack_effectiveness = (successful_attacks / total_malicious_samples) * 100 if total_malicious_samples > 0 else 0
  print(f"Mimic attack effectiveness: {attack_effectiveness:.3f}%.")

  return attack_effectiveness  # Added return statement for clarity


In [None]:
mimic_attack_effectiveness_optimized(test_loader, model_DNN , seed=230, trials=100, device=device)

Mimic attack effectiveness: 91.667%.


91.66666666666666