In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd
from ucimlrepo import fetch_ucirepo
import matplotlib.pyplot as plt
import seaborn as sns
import copy


## Load and preprocess dataset

In [2]:
# Fetch dataset
phishing_websites = fetch_ucirepo(id=327)
X = phishing_websites.data.features
y = phishing_websites.data.targets

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to tensors
X_train_tensor = torch.tensor(X_train.values.astype(float), dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values.astype(float), dtype=torch.float32)
X_test_tensor = torch.tensor(X_test.values.astype(float), dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values.astype(float), dtype=torch.float32)

# Create datasets and dataloaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

## Define the neural network model

In [3]:
class MalwareDetector(nn.Module):
    def __init__(self):
        super(MalwareDetector, self).__init__()
        self.fc1 = nn.Linear(30, 64)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(64, 32)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(32, 16)
        self.relu3 = nn.ReLU()
        self.fc4 = nn.Linear(16, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu1(self.fc1(x))
        x = self.relu2(self.fc2(x))
        x = self.relu3(self.fc3(x))
        x = self.fc4(x)
        x = self.sigmoid(x)
        return x

model = MalwareDetector()

## Define training and evaluation functions

In [4]:
def train_model(model, criterion, optimizer, train_loader, num_epochs=20):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            labels = (labels == 1).float()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            predicted = (outputs > 0.5).float()
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())
    accuracy = accuracy_score(y_true, y_pred) * 100
    print(f'Accuracy: {accuracy:.2f}%')

## Add noise to model weights

In [5]:
def add_noise_to_weights(model, noise_factor):
    with torch.no_grad():
        for param in model.parameters():
            noise = torch.randn(param.size()) * noise_factor
            param.add_(noise)
    print(f"Added noise with factor {noise_factor} to model weights.")

## Tune noise hyperparameters

In [6]:
def tune_noise(model, test_loader, noise_factors):
    best_noise_factor = None
    best_accuracy = 0
    for noise_factor in noise_factors:
        original_state_dict = model.state_dict()
        add_noise_to_weights(model, noise_factor)
        y_true = []
        y_pred = []
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                predicted = (outputs > 0.5).float()
                y_true.extend(labels.cpu().numpy())
                y_pred.extend(predicted.cpu().numpy())
        accuracy = accuracy_score(y_true, y_pred) * 100
        print(f"Noise Factor: {noise_factor}, Accuracy: {accuracy:.2f}%")
        if accuracy > best_accuracy:
            best_noise_factor = noise_factor
            best_accuracy = accuracy
        model.load_state_dict(original_state_dict)
    print(f"Best Noise Factor: {best_noise_factor}")
    print(f"Best Accuracy: {best_accuracy:.2f}%")
    return best_noise_factor

## Train and evaluate the model

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
NUM_EPOCHS = 50

# Train the model
train_model(model, criterion, optimizer, train_loader, num_epochs=NUM_EPOCHS)

# Evaluate the model
evaluate_model(model, test_loader)

Epoch [1/50], Loss: 0.3709
Epoch [2/50], Loss: 0.1798
Epoch [3/50], Loss: 0.1661
Epoch [4/50], Loss: 0.1535
Epoch [5/50], Loss: 0.1419
Epoch [6/50], Loss: 0.1314
Epoch [7/50], Loss: 0.1210
Epoch [8/50], Loss: 0.1173
Epoch [9/50], Loss: 0.1112
Epoch [10/50], Loss: 0.1046
Epoch [11/50], Loss: 0.0962
Epoch [12/50], Loss: 0.0947
Epoch [13/50], Loss: 0.0902
Epoch [14/50], Loss: 0.0864
Epoch [15/50], Loss: 0.0818
Epoch [16/50], Loss: 0.0811
Epoch [17/50], Loss: 0.0796
Epoch [18/50], Loss: 0.0723
Epoch [19/50], Loss: 0.0705
Epoch [20/50], Loss: 0.0744
Epoch [21/50], Loss: 0.0681
Epoch [22/50], Loss: 0.0655
Epoch [23/50], Loss: 0.0650
Epoch [24/50], Loss: 0.0656
Epoch [25/50], Loss: 0.0622
Epoch [26/50], Loss: 0.0612
Epoch [27/50], Loss: 0.0619
Epoch [28/50], Loss: 0.0546
Epoch [29/50], Loss: 0.0575
Epoch [30/50], Loss: 0.0589
Epoch [31/50], Loss: 0.0552
Epoch [32/50], Loss: 0.0555
Epoch [33/50], Loss: 0.0559
Epoch [34/50], Loss: 0.0512
Epoch [35/50], Loss: 0.0539
Epoch [36/50], Loss: 0.0533
E

## Find the best noise factor

In [8]:
noise_factors = [0.001, 0.01, 0.05, 0.1, 0.5, 1.0]
best_noise_factor = tune_noise(model, test_loader, noise_factors)
print('Best Noise Factor: ', best_noise_factor)

Added noise with factor 0.001 to model weights.
Noise Factor: 0.001, Accuracy: 55.36%
Added noise with factor 0.01 to model weights.
Noise Factor: 0.01, Accuracy: 55.68%
Added noise with factor 0.05 to model weights.
Noise Factor: 0.05, Accuracy: 54.55%
Added noise with factor 0.1 to model weights.
Noise Factor: 0.1, Accuracy: 55.36%
Added noise with factor 0.5 to model weights.
Noise Factor: 0.5, Accuracy: 10.81%
Added noise with factor 1.0 to model weights.
Noise Factor: 1.0, Accuracy: 4.16%
Best Noise Factor: 0.01
Best Accuracy: 55.68%
Best Noise Factor:  0.01


## Evaluate model with the best noise factor

In [9]:
def test_with_best_noise(model, test_loader, best_noise_factor):
    original_state_dict = copy.deepcopy(model.state_dict())
    add_noise_to_weights(model, best_noise_factor)
    evaluate_model(model, test_loader)
    model.load_state_dict(original_state_dict)
    print("Restored original model weights.")

test_with_best_noise(model, test_loader, best_noise_factor)

Added noise with factor 0.01 to model weights.
Accuracy: 4.16%
Restored original model weights.


## Add different types of noise to model weights

In [10]:
def add_salt_and_pepper_noise(model, noise_factor):
    print("Before S&P Noise:", list(model.parameters())[0][:5])
    with torch.no_grad():
        for param in model.parameters():
            mask = torch.rand(param.size()) < noise_factor
            param[mask] = torch.rand(mask.sum().item())
    print(f"Added salt and pepper noise with factor {noise_factor} to model weights.")
    print("After S&P Noise:", list(model.parameters())[0][:5])

def add_gaussian_noise(model, noise_factor):
    print("Before Gaussian Noise:", list(model.parameters())[0][:5])
    with torch.no_grad():
        for param in model.parameters():
            noise = torch.randn(param.size()) * noise_factor
            param.add_(noise)
    print(f"Added Gaussian noise with factor {noise_factor} to model weights.")
    print("After Gaussian Noise:", list(model.parameters())[0][:5])

## Tune different noise hyperparameters

In [11]:
def tune_different_noises(model, test_loader, noise_factors, noise_type):
    best_noise_factor = None
    best_accuracy = 0
    for noise_factor in noise_factors:
        original_state_dict = copy.deepcopy(model.state_dict())
        if noise_type == 'salt_and_pepper':
            add_salt_and_pepper_noise(model, noise_factor)
        elif noise_type == 'gaussian':
            add_gaussian_noise(model, noise_factor)
        else:
            add_noise_to_weights(model, noise_factor)
        y_true = []
        y_pred = []
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                predicted = (outputs > 0.5).float()
                y_true.extend(labels.cpu().numpy())
                y_pred.extend(predicted.cpu().numpy())
        accuracy = accuracy_score(y_true, y_pred) * 100
        print(f"Noise Type: {noise_type}, Noise Factor: {noise_factor}, Accuracy: {accuracy:.2f}%")
        if accuracy > best_accuracy:
            best_noise_factor = noise_factor
            best_accuracy = accuracy
        model.load_state_dict(original_state_dict)
    print(f"Best Noise Type: {noise_type}, Best Noise Factor: {best_noise_factor}")
    print(f"Best Accuracy: {best_accuracy:.2f}%")
    return best_noise_factor

## Find the best noise factor for different noise types

In [12]:
noise_factors = [0.001, 0.01, 0.05, 0.1, 0.5, 1.0]
noise_types = ['salt_and_pepper', 'gaussian', 'default']
best_noise_factors = {}
for noise_type in noise_types:
    best_noise_factor = tune_different_noises(model, test_loader, noise_factors, noise_type)
    best_noise_factors[noise_type] = best_noise_factor
print('Best Noise Factors: ', best_noise_factors)

Before S&P Noise: tensor([[-1.0920,  0.5944,  0.4484, -1.2092, -1.0161,  0.0379,  0.6048,  1.5295,
          0.1699,  0.3859, -0.4931, -0.9564,  1.6435,  0.8561,  0.6581,  1.9462,
          0.4872, -0.8097, -0.4127,  1.7223,  0.8261,  1.0137,  0.7044, -2.1587,
         -0.0457, -1.1857,  1.5303, -1.5391, -0.2808, -0.3174],
        [-0.4033,  0.3173, -0.6864,  0.7085,  1.0564, -1.5933,  0.2482, -1.1017,
          1.5212,  0.9349, -0.3061,  1.1741, -0.1276,  0.7144,  0.4988,  0.6298,
         -1.2029,  0.9228,  0.3143,  0.3532,  0.4004, -0.4070,  2.2823, -0.6676,
         -0.9349, -0.7756, -2.7974, -0.9460,  1.2290,  1.7901],
        [ 0.8081,  0.0447, -0.1313, -2.9061, -0.7007, -0.4197, -0.0781, -0.6689,
          0.8316,  0.2584, -1.2998,  1.8446, -1.4541,  0.7352,  1.9713, -1.3105,
         -1.2867,  1.3621,  0.8685,  0.0919, -0.3067,  1.1294,  0.1020, -1.7716,
         -0.4262,  1.1042, -0.4991,  0.1054,  1.9641,  0.0800],
        [ 0.9943, -1.3431, -0.4651, -1.0733, -0.8592, -0.6320

## Evaluate model with the best noise factors

In [13]:
def test_with_best_noises(model, test_loader, best_noise_factors):
    for noise_type, noise_factor in best_noise_factors.items():
        print(f"Testing with best noise type: {noise_type}, factor: {noise_factor}")
        original_state_dict = copy.deepcopy(model.state_dict())
        if noise_type == 'salt_and_pepper':
            add_salt_and_pepper_noise(model, noise_factor)
        elif noise_type == 'gaussian':
            add_gaussian_noise(model, noise_factor)
        else:
            add_noise_to_weights(model, noise_factor)
        evaluate_model(model, test_loader)
        model.load_state_dict(original_state_dict)
        print("Restored original model weights.")

test_with_best_noises(model, test_loader, best_noise_factors)

Testing with best noise type: salt_and_pepper, factor: 1.0
Before S&P Noise: tensor([[-1.0920,  0.5944,  0.4484, -1.2092, -1.0161,  0.0379,  0.6048,  1.5295,
          0.1699,  0.3859, -0.4931, -0.9564,  1.6435,  0.8561,  0.6581,  1.9462,
          0.4872, -0.8097, -0.4127,  1.7223,  0.8261,  1.0137,  0.7044, -2.1587,
         -0.0457, -1.1857,  1.5303, -1.5391, -0.2808, -0.3174],
        [-0.4033,  0.3173, -0.6864,  0.7085,  1.0564, -1.5933,  0.2482, -1.1017,
          1.5212,  0.9349, -0.3061,  1.1741, -0.1276,  0.7144,  0.4988,  0.6298,
         -1.2029,  0.9228,  0.3143,  0.3532,  0.4004, -0.4070,  2.2823, -0.6676,
         -0.9349, -0.7756, -2.7974, -0.9460,  1.2290,  1.7901],
        [ 0.8081,  0.0447, -0.1313, -2.9061, -0.7007, -0.4197, -0.0781, -0.6689,
          0.8316,  0.2584, -1.2998,  1.8446, -1.4541,  0.7352,  1.9713, -1.3105,
         -1.2867,  1.3621,  0.8685,  0.0919, -0.3067,  1.1294,  0.1020, -1.7716,
         -0.4262,  1.1042, -0.4991,  0.1054,  1.9641,  0.0800],
  

## Evaluate model with both types of noise

In [14]:
def test_with_both_noises(model, test_loader, best_noise_factors):
    original_state_dict = copy.deepcopy(model.state_dict())
    print("Testing with both noise types: salt_and_pepper and gaussian")
    add_salt_and_pepper_noise(model, best_noise_factors['salt_and_pepper'])
    add_gaussian_noise(model, best_noise_factors['gaussian'])
    evaluate_model(model, test_loader)
    model.load_state_dict(original_state_dict)
    print("Restored original model weights.")

test_with_both_noises(model, test_loader, best_noise_factors)

Testing with both noise types: salt_and_pepper and gaussian
Before S&P Noise: tensor([[-1.0920,  0.5944,  0.4484, -1.2092, -1.0161,  0.0379,  0.6048,  1.5295,
          0.1699,  0.3859, -0.4931, -0.9564,  1.6435,  0.8561,  0.6581,  1.9462,
          0.4872, -0.8097, -0.4127,  1.7223,  0.8261,  1.0137,  0.7044, -2.1587,
         -0.0457, -1.1857,  1.5303, -1.5391, -0.2808, -0.3174],
        [-0.4033,  0.3173, -0.6864,  0.7085,  1.0564, -1.5933,  0.2482, -1.1017,
          1.5212,  0.9349, -0.3061,  1.1741, -0.1276,  0.7144,  0.4988,  0.6298,
         -1.2029,  0.9228,  0.3143,  0.3532,  0.4004, -0.4070,  2.2823, -0.6676,
         -0.9349, -0.7756, -2.7974, -0.9460,  1.2290,  1.7901],
        [ 0.8081,  0.0447, -0.1313, -2.9061, -0.7007, -0.4197, -0.0781, -0.6689,
          0.8316,  0.2584, -1.2998,  1.8446, -1.4541,  0.7352,  1.9713, -1.3105,
         -1.2867,  1.3621,  0.8685,  0.0919, -0.3067,  1.1294,  0.1020, -1.7716,
         -0.4262,  1.1042, -0.4991,  0.1054,  1.9641,  0.0800],
 

## Final test: Compare model performance with and without noise

In [15]:
def final_test_comparison(model, test_loader, best_noise_factors):
    # Test without noise
    print("Testing model without noise")
    evaluate_model(model, test_loader)

    # Test with both noise types
    original_state_dict = model.state_dict()
    print("Testing model with both noise types: salt_and_pepper and gaussian")
    add_salt_and_pepper_noise(model, best_noise_factors['salt_and_pepper'])
    add_gaussian_noise(model, best_noise_factors['gaussian'])
    evaluate_model(model, test_loader)
    model.load_state_dict(original_state_dict)
    print("Restored original model weights.")

final_test_comparison(model, test_loader, best_noise_factors)

Testing model without noise
Accuracy: 4.16%
Testing model with both noise types: salt_and_pepper and gaussian
Before S&P Noise: tensor([[-1.0920,  0.5944,  0.4484, -1.2092, -1.0161,  0.0379,  0.6048,  1.5295,
          0.1699,  0.3859, -0.4931, -0.9564,  1.6435,  0.8561,  0.6581,  1.9462,
          0.4872, -0.8097, -0.4127,  1.7223,  0.8261,  1.0137,  0.7044, -2.1587,
         -0.0457, -1.1857,  1.5303, -1.5391, -0.2808, -0.3174],
        [-0.4033,  0.3173, -0.6864,  0.7085,  1.0564, -1.5933,  0.2482, -1.1017,
          1.5212,  0.9349, -0.3061,  1.1741, -0.1276,  0.7144,  0.4988,  0.6298,
         -1.2029,  0.9228,  0.3143,  0.3532,  0.4004, -0.4070,  2.2823, -0.6676,
         -0.9349, -0.7756, -2.7974, -0.9460,  1.2290,  1.7901],
        [ 0.8081,  0.0447, -0.1313, -2.9061, -0.7007, -0.4197, -0.0781, -0.6689,
          0.8316,  0.2584, -1.2998,  1.8446, -1.4541,  0.7352,  1.9713, -1.3105,
         -1.2867,  1.3621,  0.8685,  0.0919, -0.3067,  1.1294,  0.1020, -1.7716,
         -0.426