In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

# Neural Network for malware detection

A simple feedforward neural network with fully connected layers, suitable for binary classification (malware detection)

In [3]:
class MalwareDetector(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MalwareDetector, self).__init__()
        
        #fully connected layers
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)
        
        # Activation function (ReLU)
        self.relu = nn.ReLU()
        
        # Output activation (Sigmoid for binary classification)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        # Pass through layers with ReLU activations
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.sigmoid(self.fc3(x))  # For binary classification (0 or 1)
        return x

#### neural network parameters

In [None]:

input_size = 100  # da cambiare in base al numero di features nel dataset
hidden_size = 64  #32, 64, 128, 256 A larger hidden_size increases the capacity of the model to capture complex patterns but may lead to overfitting, especially if the dataset is small.
output_size = 1    # Binary classification (malware or not)

model = MalwareDetector(input_size, hidden_size, output_size)

### Train Hyperparameters

In [6]:
# Hyperparameters
criterion = nn.BCELoss()  # Binary Cross Entropy Loss
learning_rate = 0.001
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
BATCH_SIZE = 32
NUM_EPOCHS = 50

## Train model

In [7]:
# Example training loop
def train_model(model, criterion, optimizer, train_loader, num_epochs=20):
    model.train()  # Set the model to training mode
    
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:
            # Move data to GPU if available
            inputs, labels = inputs.to(device), labels.to(device)
            
            # Zero the parameter gradients
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels.unsqueeze(1).float())
            
            # Backward pass and optimize
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        # Print loss every epoch
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [None]:
train_model(model, criterion, optimizer, train_loader, num_epochs=NUM_EPOCHS)

## Evaluate model

In [None]:
def evaluate_model(model, test_loader):
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            predicted = (outputs > 0.5).float()  # Convert probabilities to binary labels
            total += labels.size(0)
            correct += (predicted == labels.unsqueeze(1)).sum().item()
    
    print(f'Accuracy: {100 * correct / total:.2f}%')

# Assuming test_loader is your DataLoader for the test data
evaluate_model(model, test_loader)


# NOISE
noise_factor: Controls how much noise to add. A small value like 0.01 adds subtle noise, while a larger value like 0.1 adds more significant perturbations.

In [10]:
NOISE_FACTOR = 0.01

def add_noise_to_weights(model, noise_factor):
    """Adds random Gaussian noise to the model's weights.
    
    Args:
        model: PyTorch neural network model.
        noise_factor: The magnitude of the noise to be added to the weights.
    """
    with torch.no_grad():  # No need to track gradients
        for param in model.parameters():
            noise = torch.randn(param.size()) * noise_factor
            param.add_(noise)  # Add noise to the current parameters

    print(f"Added noise with factor {noise_factor} to model weights.")


## evaluate model with noise

In [12]:
def test_with_noise(model, test_loader, noise_factor):
    """Test the model after adding noise to the weights."""
    # Save the original weights
    original_state_dict = model.state_dict()

    # Add noise to the model
    add_noise_to_weights(model, noise_factor=noise_factor)

    # Evaluate the model with noisy weights
    print("Testing model with noisy weights...")
    evaluate_model(model, test_loader)

    # Restore the original weights after testing
    model.load_state_dict(original_state_dict)
    print("Restored original model weights.")


In [None]:
test_with_noise(model, test_loader, noise_factor=NOISE_FACTOR)

### another possibility of evaluating

In [13]:
def evaluate_with_poisoned_tracking(model, test_loader, poisoned_indices, noise_factor=0.01):
    """Evaluate the model after adding noise and track the effect on poisoned vs. clean samples."""
    model.eval()
    total = 0
    correct_poisoned = 0
    correct_clean = 0
    poisoned_samples = 0
    clean_samples = 0

    # Add noise to the model
    add_noise_to_weights(model, noise_factor=noise_factor)

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(test_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            predicted = (outputs > 0.5).float()

            # Track accuracy for poisoned vs. clean samples
            for idx, prediction in enumerate(predicted):
                total += 1
                is_poisoned = i * len(predicted) + idx in poisoned_indices
                
                if is_poisoned:
                    poisoned_samples += 1
                    correct_poisoned += (prediction == labels[idx]).item()
                else:
                    clean_samples += 1
                    correct_clean += (prediction == labels[idx]).item()

    # Calculate accuracy for poisoned and clean samples
    accuracy_poisoned = 100 * correct_poisoned / poisoned_samples if poisoned_samples > 0 else 0
    accuracy_clean = 100 * correct_clean / clean_samples if clean_samples > 0 else 0

    print(f"Accuracy on poisoned samples: {accuracy_poisoned:.2f}%")
    print(f"Accuracy on clean samples: {accuracy_clean:.2f}%")

    # Restore the original weights
    model.load_state_dict(original_state_dict)
