<a href="https://colab.research.google.com/github/mostafa-ja/mal_adv3/blob/main/4_adverserial_attacks_ipynb_(2).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
download_links = ['https://github.com/mostafa-ja/mal_adv3/raw/main/data/X_redefined_sparse_matrix.npz',
                  'https://github.com/mostafa-ja/mal_adv3/raw/main/data/DNN_params%20.pth',
                  'https://github.com/mostafa-ja/mal_adv3/raw/main/data/labels.pt',
                  'https://github.com/mostafa-ja/mal_adv3/raw/main/data/vocab.pkl',
                  'https://github.com/mostafa-ja/mal_adv3/raw/main/data/adverserial_attacks_functions.py',
                  'https://github.com/mostafa-ja/mal_adv3/raw/main/data/best_model%20_RFGSM.pth',
]

In [4]:
import gdown
output_filepath = '/content/'
for link in download_links:
  gdown.download(link, output_filepath)


Downloading...
From: https://github.com/mostafa-ja/mal_adv3/raw/main/data/X_redefined_sparse_matrix.npz
To: /content/X_redefined_sparse_matrix.npz
100%|██████████| 2.31M/2.31M [00:00<00:00, 10.6MB/s]
Downloading...
From: https://github.com/mostafa-ja/mal_adv3/raw/main/data/DNN_params%20.pth
To: /content/DNN_params%20.pth
100%|██████████| 8.17M/8.17M [00:00<00:00, 26.7MB/s]
Downloading...
From: https://github.com/mostafa-ja/mal_adv3/raw/main/data/labels.pt
To: /content/labels.pt
100%|██████████| 517k/517k [00:00<00:00, 3.80MB/s]
Downloading...
From: https://github.com/mostafa-ja/mal_adv3/raw/main/data/vocab.pkl
To: /content/vocab.pkl
100%|██████████| 9.18M/9.18M [00:00<00:00, 33.1MB/s]
Downloading...
From: https://github.com/mostafa-ja/mal_adv3/raw/main/data/adverserial_attacks_functions.py
To: /content/adverserial_attacks_functions.py
45.5kB [00:00, 53.7MB/s]                   
Downloading...
From: https://github.com/mostafa-ja/mal_adv3/raw/main/data/best_model%20_RFGSM.pth
To: /conten

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from scipy import sparse
import numpy as np
import matplotlib.pyplot as plt
import pickle
import torch.nn.functional as F
import random
import time

from adverserial_attacks_functions import *

torch.manual_seed(0)

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [5]:
# Load the dictionary from the file
with open('vocab.pkl', 'rb') as f:
    vocab = pickle.load(f)

for i, (key, value) in enumerate(vocab.items()):
    print((key, value))
    if i >= 5:
        break

('android/media/mediaplayer->start', 141045)
('android/app/activity->setcontentview', 140900)
('android/os/vibrator->cancel', 141093)
('android.permission.vibrate', 140720)
('android.hardware.touchscreen', 137091)
('android.intent.action.main', 138335)


Memory-Efficient but slow when we want to convert back to tensor by
.to_dense().to(torch.float32)

```
from scipy import sparse
from sklearn.model_selection import train_test_split

# Load dataset
X_redefined = sparse.load_npz("X_redefined_sparse_matrix.npz")
labels_tensor = torch.load('labels.pt')

# Split data into train, validation, and test sets with stratified sampling
X_train_val, X_test, labels_train_val, labels_test = train_test_split(X_redefined, labels_tensor, test_size=0.2, stratify=labels_tensor, random_state=42)
X_train, X_val, labels_train, labels_val = train_test_split(X_train_val, labels_train_val, test_size=0.2, stratify=labels_train_val, random_state=42)

# Create PyTorch sparse tensors directly from the sparse matrices
train_dataset = TensorDataset(torch.sparse_coo_tensor(torch.tensor(X_train.nonzero()), torch.tensor(X_train.data), X_train.shape), labels_train)
val_dataset = TensorDataset(torch.sparse_coo_tensor(torch.tensor(X_val.nonzero()), torch.tensor(X_val.data), X_val.shape), labels_val)
test_dataset = TensorDataset(torch.sparse_coo_tensor(torch.tensor(X_test.nonzero()), torch.tensor(X_test.data), X_test.shape), labels_test)

# Clear unnecessary variables
del X_redefined, labels_tensor, X_train_val, X_test, labels_train_val, labels_test, X_train, X_val, labels_train, labels_val
```



In [6]:
# Load dataset
X_redefined = sparse.load_npz("X_redefined_sparse_matrix.npz")
labels_tensor = torch.load('labels.pt')

# Split data into train, validation, and test sets with stratified sampling
X_train_val, X_test, labels_train_val, labels_test = train_test_split(X_redefined, labels_tensor, test_size=0.2, stratify=labels_tensor, random_state=42)
X_train, X_val, labels_train, labels_val = train_test_split(X_train_val, labels_train_val, test_size=0.2, stratify=labels_train_val, random_state=42)

# Combine features and labels into datasets
# we use dtype=torch.int8, for Memory-Efficient here, later we will convert to float
train_dataset = TensorDataset(torch.tensor(X_train.toarray(), dtype=torch.int8), labels_train)
val_dataset = TensorDataset(torch.tensor(X_val.toarray(), dtype=torch.int8), labels_val)
test_dataset = TensorDataset(torch.tensor(X_test.toarray(), dtype=torch.int8), labels_test)

# Clear unnecessary variables
del X_redefined, labels_tensor, X_train_val, X_test, labels_train_val, labels_test, X_train, X_val, labels_train, labels_val

In [7]:
# Define the DataLoader for training, validation, and test sets
batch_size = 256
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [8]:
class MalwareDetectionModel(nn.Module):
    def __init__(self, input_size=10000, hidden_1_size=200, hidden_2_size=200, num_labels=2, dropout_prob=0.6):
        super(MalwareDetectionModel, self).__init__()

        self.input_size = input_size
        self.hidden_1_size = hidden_1_size
        self.hidden_2_size = hidden_2_size
        self.num_labels = num_labels
        self.dropout_prob = dropout_prob

        self.fc1 = nn.Linear(input_size, hidden_1_size)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout_prob)
        self.fc2 = nn.Linear(hidden_1_size, hidden_2_size)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(dropout_prob)
        self.fc3 = nn.Linear(hidden_2_size, num_labels)
        self.log_softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        x = self.fc3(x)
        x = self.log_softmax(x)
        return x


In [15]:
model_DNN = MalwareDetectionModel().to(device)
# Load model parameters
model_DNN.load_state_dict(torch.load('DNN_params%20.pth', map_location=torch.device(device)))

<All keys matched successfully>

In [12]:
def adversarial_training(model, train_loader, val_loader, attack, adv_epochs=10, lr=0.001, weight_decay=0., device=device, verbose=True, **kwargs):
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    criterion = nn.CrossEntropyLoss()
    total_time = 0.
    nbatches = len(train_loader)
    best_acc_val = 0.
    acc_val_adv_be = 0.
    best_epoch = 0

    for epoch in range(adv_epochs):
        epoch_losses = []
        epoch_accuracies = []

        for idx_batch, (x_batch, y_batch) in enumerate(train_loader):
            x_batch, y_batch = x_batch.to(torch.float32).to(device), y_batch.to(device)
            batch_size = x_batch.shape[0]

            # Separate malicious and benign samples
            mal_x_batch, ben_x_batch = x_batch[y_batch.squeeze() == 1], x_batch[y_batch.squeeze() == 0]
            mal_y_batch, ben_y_batch = y_batch[y_batch.squeeze() == 1], y_batch[y_batch.squeeze() == 0]

            # Generate adversarial examples
            model.eval()
            pertb_mal_x = attack(mal_x_batch, mal_y_batch, model, **kwargs)
            x_batch = torch.cat([ben_x_batch, pertb_mal_x], dim=0)
            y_batch = torch.cat([ben_y_batch, mal_y_batch])
            model.train()

            # Forward pass and backward pass
            optimizer.zero_grad()
            outputs = model(x_batch)
            loss_train = criterion(outputs, y_batch.view(-1).long())
            loss_train.backward()
            optimizer.step()

            # Calculate metrics
            epoch_losses.append(loss_train.item())
            _, predicted = torch.topk(outputs, k=1)
            acc_train = (predicted == y_batch).sum().item() / len(y_batch)
            epoch_accuracies.append(acc_train)

            # Print batch level information
            if verbose:
                print(f'Mini batch: {idx_batch + 1}/{nbatches} | Epoch: {epoch + 1}/{adv_epochs} | Batch Loss: {loss_train.item():.4f} | Batch Accuracy: {acc_train * 100:.2f}%')

        # Calculate epoch level metrics
        mean_loss = np.mean(epoch_losses)
        mean_accuracy = np.mean(epoch_accuracies) * 100

        # Print epoch level information
        if verbose:
            print(f'Training loss (epoch level): {mean_loss:.4f} | Train accuracy: {mean_accuracy:.2f}%')

        # Evaluation on validation set
        model.eval()
        avg_acc_ad_val = []
        avg_acc_val = []
        for x_val, y_val in val_loader:
            x_val, y_val = x_val.to(device), y_val.to(device)
            outputs = model(x_val)
            _, predicted = torch.topk(outputs, k=1)
            acc_val = (predicted == y_val).sum().item() / len(y_val)
            avg_acc_val.append(acc_val)

            # Generate adversarial examples for validation set
            mal_x_batch, mal_y_batch = x_val[y_val.squeeze() == 1], y_val[y_val.squeeze() == 1]
            pertb_mal_x = attack(mal_x_batch, mal_y_batch, model)
            outputs = model(pertb_mal_x)
            _, y_pred = torch.topk(outputs, k=1)

            acc_ad_val = (y_pred == 1.).sum().item() / len(y_pred)
            avg_acc_ad_val.append(acc_ad_val)

        # Calculate validation accuracy
        assert len(avg_acc_ad_val) > 0
        acc_all = (np.mean(avg_acc_val) + np.mean(avg_acc_ad_val)) / 2.

        # Update best validation accuracy
        if acc_all >= best_acc_val:
            best_acc_val = acc_all
            acc_val_adv_be = np.mean(avg_acc_ad_val)
            best_epoch = epoch + 1
            torch.save(model.state_dict(), 'best_model.pth')

        # Print validation results
        if verbose:
            print(f"\tVal accuracy(without attack) {np.mean(avg_acc_val) * 100:.4}% and accuracy(with attack) {np.mean(avg_acc_ad_val) * 100:.4}% under attack and overall accuracy {acc_all * 100:.4}%.")
            print(f"\tModel select at epoch {best_epoch} with validation accuracy {best_acc_val * 100:.4}% and accuracy {acc_val_adv_be * 100:.4}% under attack.")


In [None]:
# AT-rFGSM: Adversarial Taraining based on rFGSM attack
model_AT = MalwareDetectionModel().to(device)

attack_param = {"k":50, "epsilon":0.02, 'random':True, "is_sample":False, 'is_report_loss_diff':True}
adversarial_training(model_AD, train_loader, val_loader, adv_epochs=50, attack=dfgsm_k, **attack_param)

In [13]:
def adv_predict(test_loader, model, attack, device, **kwargs):
    model.eval()
    avg_acc_ad_test = []
    avg_acc_test = []
    for x_test, y_test in test_loader:
        x_test, y_test = x_test.to(torch.float32).to(device), y_test.to(device)
        outputs = model(x_test)
        _, predicted = torch.topk(outputs, k=1)
        acc_test = (predicted == y_test).sum().item() / len(y_test)
        avg_acc_test.append(acc_test)

        # Generate adversarial examples for test set
        mal_x_batch, mal_y_batch = x_test[y_test.squeeze() == 1], y_test[y_test.squeeze() == 1]
        pertb_mal_x = attack(mal_x_batch, mal_y_batch, model, **kwargs)
        outputs = model(pertb_mal_x)
        _, y_pred = torch.topk(outputs, k=1)

        acc_ad_test = (y_pred == 1.).sum().item() / len(y_pred)
        avg_acc_ad_test.append(acc_ad_test)

    # Calculate test accuracy
    print(f"Adversarial accuracy (without attack): {np.mean(avg_acc_test) * 100:.4}% | Under attack: {np.mean(avg_acc_ad_test) * 100:.4}%.")


In [14]:
# Create an instance of your model
model_AT_rFGSM = MalwareDetectionModel().to(device)

# Load model parameters
model_AT_rFGSM.load_state_dict(torch.load('best_model%20_RFGSM.pth', map_location=torch.device(device)))

<All keys matched successfully>

In [16]:
adv_predict(test_loader, model_DNN, attack=pgd, is_report_loss_diff=False, device=device)

Adversarial accuracy (without attack): 99.34% | Under attack: 0.0%.


In [17]:
adv_predict(test_loader, model_AT_rFGSM, attack=pgd, is_report_loss_diff=False, device=device)

Adversarial accuracy (without attack): 99.36% | Under attack: 90.83%.


In [20]:
# Groose
attack_params = {"k":100, 'is_report_loss_diff':False}
adv_predict(test_loader, model_DNN, grosse_k, device, **attack_params)
adv_predict(test_loader, model_AT_rFGSM, grosse_k, device, **attack_params)

Adversarial accuracy (without attack): 99.34% | Under attack: 0.0%.
Adversarial accuracy (without attack): 99.36% | Under attack: 83.93%.


In [21]:
# BCA
attack_params = {"k":100, 'is_report_loss_diff':False, 'use_sample':False}
adv_predict(test_loader, model_DNN, bca_k, device, **attack_params)
adv_predict(test_loader, model_AT_rFGSM, bca_k, device, **attack_params)

Adversarial accuracy (without attack): 99.34% | Under attack: 0.0%.
Adversarial accuracy (without attack): 99.36% | Under attack: 83.93%.


In [22]:
# BGA
attack_params = {"k":100, 'is_report_loss_diff':False, 'use_sample':False}
adv_predict(test_loader, model_DNN, bga_k, device, **attack_params)
adv_predict(test_loader, model_AT_rFGSM, bga_k, device, **attack_params)

Adversarial accuracy (without attack): 99.34% | Under attack: 0.0%.
Adversarial accuracy (without attack): 99.36% | Under attack: 90.87%.


In [24]:
# rFGSM
attack_params = {"k":100, "epsilon":0.02, 'random':True, 'is_report_loss_diff':False, 'is_sample':False}
adv_predict(test_loader, model_DNN, dfgsm_k, device, **attack_params)
adv_predict(test_loader, model_AT_rFGSM, dfgsm_k, device, **attack_params)

Adversarial accuracy (without attack): 99.34% | Under attack: 0.0%.
Adversarial accuracy (without attack): 99.36% | Under attack: 89.91%.


In [None]:
# PGD-l1
attack_params = {"k":500, "step_length":1., 'norm':'l1', 'random':False, 'is_report_loss_diff':False, 'is_sample':False}
adv_predict(test_loader, model_DNN, pgd, device, **attack_params)
adv_predict(test_loader, model_AT_rFGSM, pgd, device, **attack_params)

Adversarial accuracy (without attack): 99.34% | Under attack: 0.0%.


In [None]:
# PGD-l2
attack_params = {"k":200, "step_length":0.05, 'norm':'l2', 'random':False, 'is_report_loss_diff':False, 'is_sample':False}
adv_predict(test_loader, model_DNN, pgd, device, **attack_params)
adv_predict(test_loader, model_AT_rFGSM, pgd, device, **attack_params)

In [None]:
# PGD-linf
attack_params = {"k":500, "step_length":0.002, 'norm':'linf', 'random':False, 'is_report_loss_diff':False, 'is_sample':False}
adv_predict(test_loader, model_DNN, pgd, device, **attack_params)
adv_predict(test_loader, model_AT_rFGSM, pgd, device, **attack_params)

In [19]:
attack_params = {'step_lengths':{"l1": 1.0, "l2": 0.05, "linf": 0.001}, "steps":500}
adv_predict(test_loader, model_AT_rFGSM, StepwiseMax_onestep2, device, **attack_params)

step-wise max: attack effectiveness 0.000%.
step-wise max: attack effectiveness 10.000%.


KeyboardInterrupt: 

In [None]:
# Define different attacks with their parameters
attacks = [
    (dfgsm_k, {"k":100, "epsilon":0.02, 'is_report_loss_diff' : False}),
    (bga_k, {"k":100, 'is_report_loss_diff' : False}),
    (bca_k, {"k":100, 'is_report_loss_diff' : False}),
    (grosse_k, {"k":100, 'is_report_loss_diff' : False}),

    (pgd, {'k': 100, 'step_length': 1., 'norm': 'l1', 'is_report_loss_diff' : False}),
    (pgd, {'k': 200, 'step_length': 0.05, 'norm': 'l2', 'is_report_loss_diff' : False}),
    (pgd, {'k': 100, 'step_length': 0.02, 'norm': 'linf', 'is_report_loss_diff' : False}),
    (StepwiseMax_onestep2, {'step_lengths':{"l1": 1.0, "l2": 0.05, "linf": 0.0008}, "steps":650}),

    # Add more attacks as needed
]

# Iterate over each attack and its parameters
for attack_func, attack_params in attacks:
    print(f"Running attack: {attack_func.__name__} with parameters: {attack_params}")
    adv_predict(test_loader, model_AT_rFGSM, attack_func, device, **attack_params)
    print()  # Print an empty line for separation


Running attack: dfgsm_k with parameters: {'k': 50, 'epsilon': 0.02, 'is_report_loss_diff': False}
Adversarial accuracy (without attack): 99.25% | Under attack: 83.43%.

Running attack: bga_k with parameters: {'k': 25, 'is_report_loss_diff': False}
Adversarial accuracy (without attack): 99.25% | Under attack: 91.57%.

Running attack: bca_k with parameters: {'k': 25, 'is_report_loss_diff': False}
Adversarial accuracy (without attack): 99.25% | Under attack: 87.93%.

Running attack: grosse_k with parameters: {'k': 25, 'is_report_loss_diff': False}
Adversarial accuracy (without attack): 99.25% | Under attack: 87.93%.

Running attack: pgd with parameters: {'k': 100, 'step_length': 1.0, 'norm': 'l1', 'is_report_loss_diff': False}
Adversarial accuracy (without attack): 99.25% | Under attack: 87.24%.

Running attack: pgd with parameters: {'k': 200, 'step_length': 0.05, 'norm': 'l2', 'is_report_loss_diff': False}
Adversarial accuracy (without attack): 99.25% | Under attack: 91.67%.

Running att

In [31]:
def mimic_attack_effectiveness_optimized(test_loader, model, seed, trials=1000, device="cuda:0"):
  """
  Calculates the effectiveness of the mimic attack on the given model.

  Args:
      test_loader: A PyTorch dataloader containing the test data.
      model: The PyTorch model to be attacked.
      seed: The random seed for reproducibility.
      trials: The number of random samples to use from the benign class (default: 1000).
      device: The device to use for computations (default: "cuda:0" if available, otherwise "cpu").

  Returns:
      The effectiveness of the mimic attack as a percentage (float).
  """

  torch.manual_seed(seed)
  model.eval()

  # Initialize counters
  successful_attacks = 0
  total_malicious_samples = 0

  # Pre-select benign samples for efficiency
  benign_samples = []
  for x_batch, y_batch in test_loader:
    benign_samples.append(x_batch[y_batch.squeeze() == 0])

  ben_x = torch.cat(benign_samples, dim=0).to(device)

  # Clear unnecessary variables
  del benign_samples

  trials = min(trials, len(ben_x))


  for x_batch, y_batch in test_loader:
    x_batch, y_batch = x_batch.to(device), y_batch.to(device)
    malicious_samples = x_batch[y_batch.squeeze() == 1]

    if len(malicious_samples) > 0:
      # Expand dimensions for efficient broadcasting
      malicious_samples_expanded = malicious_samples.unsqueeze(1).expand(-1, trials, -1)

      # Generate random indices outside the loop
      seed += 1
      torch.manual_seed(seed)
      indices = torch.randperm(len(ben_x), device=device)[:trials]
      trial_vectors_expanded = ben_x[indices].unsqueeze(0)

      # Perform the mimic attack and update counters
      modified_x = torch.clamp(malicious_samples_expanded + trial_vectors_expanded, min=0., max=1.)
      _, done = get_loss(modified_x.view(-1, modified_x.shape[-1]), torch.ones(trials * malicious_samples.shape[0], 1, device=device), model)
      successful_attacks += (done.view(malicious_samples.shape[0], trials).sum(dim=1) > 0).sum().item()
      total_malicious_samples += malicious_samples.shape[0]

  # Calculate and print attack effectiveness
  attack_effectiveness = (successful_attacks / total_malicious_samples) * 100 if total_malicious_samples > 0 else 0
  print(f"Mimic attack effectiveness: {attack_effectiveness:.3f}%.")

  return attack_effectiveness  # Added return statement for clarity


In [34]:
mimic_attack_effectiveness_optimized(test_loader, model_AT_rFGSM , seed=230, trials=30, device=device)

Mimic attack effectiveness: 11.871%.


11.870503597122301