<a href="https://colab.research.google.com/github/mostafa-ja/mal_adv3/blob/main/4_adverserial_attacks_ipynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from scipy import sparse
import gdown
import numpy as np
import matplotlib.pyplot as plt
import pickle
import torch.nn.functional as F
import random
import time

In [6]:
download_link = 'https://drive.google.com/uc?id=12iud4h19CZst4exbr3U2A9iDxBYvZ5U_'
output_filepath = '/content/'
gdown.download(download_link, output_filepath)

Downloading...
From: https://drive.google.com/uc?id=12iud4h19CZst4exbr3U2A9iDxBYvZ5U_
To: /content/X_redefined_sparse_matrix.npz
100%|██████████| 2.31M/2.31M [00:00<00:00, 165MB/s]


'/content/X_redefined_sparse_matrix.npz'

In [7]:
download_link = 'https://drive.google.com/uc?id=1IhrcT3jHqlPrw2KvQ5vJkBgozxcJ1cJm'
output_filepath = '/content/'
gdown.download(download_link, output_filepath)

Downloading...
From: https://drive.google.com/uc?id=1IhrcT3jHqlPrw2KvQ5vJkBgozxcJ1cJm
To: /content/labels.pt
100%|██████████| 517k/517k [00:00<00:00, 59.9MB/s]


'/content/labels.pt'

In [8]:
download_link = 'https://drive.google.com/uc?id=13o5n06UpMDOhtk4u7B_RBSWa3kiiGXFs'
output_filepath = '/content/'
gdown.download(download_link, output_filepath)

Downloading...
From: https://drive.google.com/uc?id=13o5n06UpMDOhtk4u7B_RBSWa3kiiGXFs
To: /content/DNN_params.pth
100%|██████████| 8.17M/8.17M [00:00<00:00, 59.7MB/s]


'/content/DNN_params.pth'

In [9]:
download_link = 'https://drive.google.com/uc?id=1PxFOLBnQAlX-EOsqkhGCSd1T3ykAD0-4'
output_filepath = '/content/'
gdown.download(download_link, output_filepath)

Downloading...
From: https://drive.google.com/uc?id=1PxFOLBnQAlX-EOsqkhGCSd1T3ykAD0-4
To: /content/vocab.pkl
100%|██████████| 9.18M/9.18M [00:00<00:00, 65.0MB/s]


'/content/vocab.pkl'

In [10]:
# Load the dictionary from the file
with open('vocab.pkl', 'rb') as f:
    vocab = pickle.load(f)

for i, (key, value) in enumerate(vocab.items()):
    print((key, value))
    if i >= 5:
        break

('android/media/mediaplayer->start', 141045)
('android/app/activity->setcontentview', 140900)
('android/os/vibrator->cancel', 141093)
('android.permission.vibrate', 140720)
('android.hardware.touchscreen', 137091)
('android.intent.action.main', 138335)


In [4]:
# Load dataset
X_redefined = sparse.load_npz("X_redefined_sparse_matrix.npz")
labels_tensor = torch.load('labels.pt')

In [5]:
# Split data into train, validation, and test sets with stratified sampling
X_train_val, X_test, labels_train_val, labels_test = train_test_split(X_redefined, labels_tensor, test_size=0.2, stratify=labels_tensor, random_state=42)
X_train, X_val, labels_train, labels_val = train_test_split(X_train_val, labels_train_val, test_size=0.2, stratify=labels_train_val, random_state=42)

# Combine features and labels into datasets
train_dataset = TensorDataset(torch.tensor(X_train.toarray(), dtype=torch.float32), labels_train)
val_dataset = TensorDataset(torch.tensor(X_val.toarray(), dtype=torch.float32), labels_val)
test_dataset = TensorDataset(torch.tensor(X_test.toarray(), dtype=torch.float32), labels_test)


In [59]:
# Define the DataLoader for training, validation, and test sets
batch_size = 256
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [1]:
import torch
import torch.nn as nn

class MalwareDetectionModel(nn.Module):
    def __init__(self, input_size=10000, hidden_1_size=200, hidden_2_size=200, num_labels=2, dropout_prob=0.6):
        super(MalwareDetectionModel, self).__init__()

        self.input_size = input_size
        self.hidden_1_size = hidden_1_size
        self.hidden_2_size = hidden_2_size
        self.num_labels = num_labels
        self.dropout_prob = dropout_prob

        self.fc1 = nn.Linear(input_size, hidden_1_size)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout_prob)
        self.fc2 = nn.Linear(hidden_1_size, hidden_2_size)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(dropout_prob)
        self.fc3 = nn.Linear(hidden_2_size, num_labels)
        self.log_softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        x = self.fc3(x)
        x = self.log_softmax(x)
        return x


In [60]:
# Create an instance of your model
model = MalwareDetectionModel()

# Load model parameters
model.load_state_dict(torch.load('DNN_params.pth'))

<All keys matched successfully>

In [7]:
X,y = next(iter(test_loader))
print(X.shape)
print(y.shape)

torch.Size([256, 10000])
torch.Size([256, 1])


In [17]:
len(test_loader)

101

In [8]:
torch.manual_seed(0)

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [76]:
model = MalwareDetectionModel()

In [20]:
from adverserial_attacks_functions import *


In [53]:
def pgd(x, y, model, k=25, step_length=0.02, norm='linf', initial_rounding_threshold=0.5, round_threshold=0.5, random=False, is_report_loss_diff=True, is_sample=False):
    """
    Projected Gradient Descent (PGD) adversarial attack.
    :param y: Ground truth labels
    :param x: Feature vector
    :param model: Neural network model
    :param k: Number of steps
    :param step_length: Step size for each iteration
    :param norm: Norm used for perturbation ('linf' or 'l2')
    :param initial_rounding_threshold: Threshold parameter for rounding the initial x_next
    :param round_threshold: Threshold parameter for rounding
    :param random: Flag to generate random thresholds
    :param is_report_loss_diff: Flag to report loss difference
    :param is_sample: Flag to sample randomly from the feasible area
    :return: The adversarial version of x (tensor)
    """
    model.eval()

    # Compute natural loss
    criterion = nn.CrossEntropyLoss(reduction='none')
    loss_natural = criterion(model(x), y.view(-1).long())

    # Initialize starting point
    x_next = x.clone()
    x_next = get_x0(x_next, initial_rounding_threshold, is_sample)

    # Multi-step PGD
    for t in range(k):
        # Forward pass
        x_var = x_next.clone().detach().requires_grad_(True)
        y_model = model(x_var)
        loss = criterion(y_model, y.view(-1).long())

        # Compute gradient
        grad_vars = torch.autograd.grad(loss.mean(), x_var)
        grad_data = grad_vars[0].data
        gradients = grad_data * (x < 0.5)

        # Norm
        if norm == 'linf':
            perturbation = torch.sign(gradients)
        elif norm == 'l2_2':
            max_grad, _ = gradients.max(dim=1, keepdim=True)
            l2norm = torch.linalg.norm(gradients, dim=-1, keepdim=True)
            #print(max_grad/l2norm)
            perturbation = torch.minimum(torch.tensor(1., dtype=x.dtype, device=x.device), gradients / max_grad)

        elif norm == 'l2':
            l2norm = torch.linalg.norm(gradients, dim=-1, keepdim=True)
            perturbation = torch.minimum(torch.tensor(1., dtype=x.dtype, device=x.device), gradients / l2norm)
            perturbation[torch.isnan(perturbation)] = 0.
            perturbation[torch.isinf(perturbation)] = 1.
        elif norm == 'l1':
            #ignore the gradient of indice which is updated
            gradients = gradients * (x_next < 0.5)
            val, _ = torch.topk(gradients, 1)
            perturbation = (gradients >= val.expand_as(gradients)).float()
            # stop perturbing the examples that are successful to evade the victim
            outputs = model(x_next)
            _, predicted = torch.topk(outputs, k=1)
            done = (predicted != y).squeeze()

            if torch.all(done):
                break
            perturbation[done] = 0.

        else:
            raise ValueError("Expect 'l1' or 'l2' or 'linf' norm.")

        # Update x_next
        x_next = torch.clamp(x_next + perturbation * step_length, min=0., max=1.)

    # Rounding step
    if random:
       round_threshold = torch.rand(x_next.size())
    x_next = round_x(x_next, round_threshold=round_threshold)

    # Feasible projection
    x_next = or_float_tensors(x_next, x)

    # Compute adversarial loss
    loss_adv = criterion(model(x_next), y.view(-1).long()).data

    if is_report_loss_diff:
        outputs = model(x_next)
        _, predicted = torch.topk(outputs, k=1)
        done = (predicted != y).squeeze()
        print(f"PGD {norm}: Attack effectiveness {(done.sum().item() / x.size()[0]) * 100:.3f}%.")

    # Replace with natural if adversarial loss is higher
    replace_flag = (loss_adv < loss_natural).squeeze()
    x_next[replace_flag] = x[replace_flag]

    return x_next


In [84]:
verbose = True
adv_epochs = 10
lr = 0.001
weight_decay = 0.
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
criterion = nn.CrossEntropyLoss()
total_time = 0.
nbatches = len(train_loader)
best_acc_val = 0.
acc_val_adv_be = 0.
best_epoch = 0

for epoch in range(adv_epochs):
    epoch_losses = []
    epoch_accuracies = []
    for idx_batch, (x_batch, y_batch) in enumerate(train_loader):
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        batch_size = x_batch.shape[0]

        # Separate malicious and benign samples
        mal_x_batch, ben_x_batch = x_batch[y_batch.squeeze() == 1], x_batch[y_batch.squeeze() == 0]
        mal_y_batch, ben_y_batch = y_batch[y_batch.squeeze() == 1], y_batch[y_batch.squeeze() == 0]

        # Generate adversarial examples
        model.eval()
        pertb_mal_x = pgd(mal_x_batch, mal_y_batch, model, k=25, step_length=1., norm='l1', initial_rounding_threshold=0.5, round_threshold=0.5, random=False, is_report_loss_diff=True, is_sample=False)
        x_batch = torch.cat([ben_x_batch, pertb_mal_x], dim=0)
        y_batch = torch.cat([ben_y_batch, mal_y_batch])
        model.train()

        # Forward pass and backward pass
        optimizer.zero_grad()
        outputs = model(x_batch)
        loss_train = criterion(outputs, y_batch.view(-1).long())
        loss_train.backward()
        optimizer.step()

        # Calculate metrics
        epoch_losses.append(loss_train.item())
        _, predicted = torch.topk(outputs, k=1)
        acc_train = (predicted == y_batch).sum().item() / len(y_batch)
        epoch_accuracies.append(acc_train)

        # Print batch level information
        if verbose:
            print(f'Mini batch: {idx_batch + 1}/{nbatches} | Epoch: {epoch + 1}/{adv_epochs} | Batch Loss: {loss_train.item():.4f} | Batch Accuracy: {acc_train * 100:.2f}%')

    # Calculate epoch level metrics
    mean_loss = np.mean(epoch_losses)
    mean_accuracy = np.mean(epoch_accuracies) * 100

    # Print epoch level information
    if verbose:
        print(f'Training loss (epoch level): {mean_loss:.4f} | Train accuracy: {mean_accuracy:.2f}%')

    # Evaluation on validation set
    model.eval()
    avg_acc_ad_val = []
    avg_acc_val = []
    for x_val, y_val in val_loader:
        x_val, y_val = x_val.to(device), y_val.to(device)
        outputs = model(x_val)
        _, predicted = torch.topk(outputs, k=1)
        acc_val = (predicted == y_val).sum().item() / len(y_val)
        avg_acc_val.append(acc_val)

        # Generate adversarial examples for validation set
        mal_x_batch, mal_y_batch = x_val[y_val.squeeze() == 1], y_val[y_val.squeeze() == 1]
        pertb_mal_x = pgd(mal_x_batch, mal_y_batch, model, k=25, step_length=1., norm='l1', initial_rounding_threshold=0.5, round_threshold=0.5, random=False, is_report_loss_diff=True, is_sample=False)
        outputs = model(pertb_mal_x)
        _, y_pred = torch.topk(outputs, k=1)

        acc_ad_val = (y_pred == 1.).sum().item() / len(y_pred)
        avg_acc_ad_val.append(acc_ad_val)


    # Calculate validation accuracy
    assert len(avg_acc_ad_val) > 0
    acc_all = (np.mean(avg_acc_val) + np.mean(avg_acc_ad_val)) / 2.

    # Update best validation accuracy
    if acc_all >= best_acc_val:
        best_acc_val = acc_all
        acc_val_adv_be = np.mean(avg_acc_ad_val)
        best_epoch = epoch + 1

    # Print validation results
    if verbose:
        print(f"\tVal accuracy(without attack) {np.mean(avg_acc_val) * 100:.4}% and accuracy(with attack) {np.mean(avg_acc_ad_val) * 100:.4}% under attack and overall accuracy {acc_all * 100:.4}%.")
        print(f"\tModel select at epoch {best_epoch} with validation accuracy {best_acc_val * 100:.4}% and accuracy {acc_val_adv_be * 100:.4}% under attack.")


PGD l1: Attack effectiveness 25.000%.
Mini batch: 1/323 | Epoch: 1/10 | Batch Loss: 0.0178 | Batch Accuracy: 99.22%
PGD l1: Attack effectiveness 9.091%.
Mini batch: 2/323 | Epoch: 1/10 | Batch Loss: 0.0171 | Batch Accuracy: 99.61%
PGD l1: Attack effectiveness 33.333%.
Mini batch: 3/323 | Epoch: 1/10 | Batch Loss: 0.0300 | Batch Accuracy: 98.44%
PGD l1: Attack effectiveness 25.000%.
Mini batch: 4/323 | Epoch: 1/10 | Batch Loss: 0.0587 | Batch Accuracy: 98.44%
PGD l1: Attack effectiveness 17.647%.
Mini batch: 5/323 | Epoch: 1/10 | Batch Loss: 0.1192 | Batch Accuracy: 98.44%
PGD l1: Attack effectiveness 14.286%.
Mini batch: 6/323 | Epoch: 1/10 | Batch Loss: 0.0219 | Batch Accuracy: 99.61%
PGD l1: Attack effectiveness 38.462%.
Mini batch: 7/323 | Epoch: 1/10 | Batch Loss: 0.0420 | Batch Accuracy: 98.05%
PGD l1: Attack effectiveness 15.385%.
Mini batch: 8/323 | Epoch: 1/10 | Batch Loss: 0.0439 | Batch Accuracy: 98.83%
PGD l1: Attack effectiveness 30.000%.
Mini batch: 9/323 | Epoch: 1/10 | B

KeyboardInterrupt: 