In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from numpy.typing import NDArray



Preprocessing

In [16]:
data = pd.read_csv('data/UNSW_NB15_training-set.csv') # Load dataset
data = data[0:100000] # Limit to 100k rows for faster processing
data.to_csv('s.csv')
categorical_columns = ['proto', 'service', 'state'] # Categorical columns to be converted to numerical\
data = pd.get_dummies(data, columns=categorical_columns) # Convert categorical columns to numerical
labels = data['label'].values # Extract labels
features = data.drop(columns=['label', 'attack_cat'], axis=1) # Extract features


''' Training and Testing Data '''
X_train, X_test, Y_train, Y_test = train_test_split(features, labels, test_size=0.2, random_state=42, stratify=labels) # Split data into training and testing sets
scaler = StandardScaler() # Initialize scaler
X_train = scaler.fit_transform(X_train) # Fit and transform training data
X_test = scaler.transform(X_test) # Transform testing data



In [4]:
from torch.utils.data import DataLoader, TensorDataset
import torch

In [5]:
import torch
from torch.utils.data import TensorDataset, DataLoader

# 6. Convert NumPy arrays to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(Y_train, dtype=torch.float32)
X_test_tensor  = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor  = torch.tensor(Y_test, dtype=torch.float32)

# 7. Create TensorDataset for convenient data handling
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset  = TensorDataset(X_test_tensor, y_test_tensor)

# 8. Use DataLoader to batch and shuffle the data
batch_size = 64  # industry-standard batch sizes are powers of 2 (32, 64, 128, etc.)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [6]:
import torch.nn as nn
import torch.nn.functional as F



In [7]:

# 9. Define the neural network architecture
class Net(nn.Module):
    def __init__(self, input_size):
        super(Net, self).__init__()
        # Define layers:
        self.fc1 = nn.Linear(input_size, 64)   # fully connected layer 1: input -> 64 hidden units
        self.fc2 = nn.Linear(64, 1)           # fully connected layer 2: 64 -> 1 output
    
    def forward(self, x):
        # Define the forward pass (how data moves through the network)
        x = F.relu(self.fc1(x))   # apply ReLU activation after first layer
        x = self.fc2(x)           # second layer (output logits)
        return x

# 10. Initialize the model
input_dim = X_train_tensor.shape[1]   # number of features after encoding
model = Net(input_dim)
print(model)


Net(
  (fc1): Linear(in_features=195, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=1, bias=True)
)


In [8]:
import torch.optim as optim

In [9]:
torch.cuda.is_available()

False

In [10]:
device = torch.device('cpu')
model.to(device)


Net(
  (fc1): Linear(in_features=195, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=1, bias=True)
)

In [11]:
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr= 0.001)


In [12]:
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    running_loss = 0
    
    for batch_X, batch_Y in train_loader:
        batch_X = batch_X.to(device)
        batch_Y = batch_Y.to(device)
        
        outputs = model(batch_X)
        outputs = outputs.view(-1)
        
        loss = criterion(outputs, batch_Y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * batch_X.size(0)
        epoch_loss = running_loss / len(train_dataset)
        
        
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")
        

Epoch [1/100], Loss: 0.1743
Epoch [2/100], Loss: 0.1275
Epoch [3/100], Loss: 0.1047
Epoch [4/100], Loss: 0.0915
Epoch [5/100], Loss: 0.0833
Epoch [6/100], Loss: 0.0769
Epoch [7/100], Loss: 0.0722
Epoch [8/100], Loss: 0.0678
Epoch [9/100], Loss: 0.0647
Epoch [10/100], Loss: 0.0623
Epoch [11/100], Loss: 0.0603
Epoch [12/100], Loss: 0.0589
Epoch [13/100], Loss: 0.0576
Epoch [14/100], Loss: 0.0566
Epoch [15/100], Loss: 0.0556
Epoch [16/100], Loss: 0.0550
Epoch [17/100], Loss: 0.0540
Epoch [18/100], Loss: 0.0535
Epoch [19/100], Loss: 0.0527
Epoch [20/100], Loss: 0.0520
Epoch [21/100], Loss: 0.0518
Epoch [22/100], Loss: 0.0511
Epoch [23/100], Loss: 0.0503
Epoch [24/100], Loss: 0.0502
Epoch [25/100], Loss: 0.0495
Epoch [26/100], Loss: 0.0499
Epoch [27/100], Loss: 0.0487
Epoch [28/100], Loss: 0.0484
Epoch [29/100], Loss: 0.0479
Epoch [30/100], Loss: 0.0480
Epoch [31/100], Loss: 0.0479
Epoch [32/100], Loss: 0.0472
Epoch [33/100], Loss: 0.0469
Epoch [34/100], Loss: 0.0473
Epoch [35/100], Loss: 0

In [13]:
model.eval()



Net(
  (fc1): Linear(in_features=195, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=1, bias=True)
)

In [14]:
correct = 0
total = 0
TP = FP = FN = 0

with torch.no_grad():
    for batch_X, batch_Y in test_loader:
        batch_X = batch_X.to(device)
        batch_Y = batch_Y.to(device)
        
        outputs = model(batch_X)
        
        probs = torch.sigmoid(outputs)
        preds = (probs >= 0.5).float()
                # Flatten predictions and labels to 1D
        preds = preds.view(-1)
        labels = batch_Y.view(-1)
        # Count correct predictions
        correct += (preds == labels).sum().item()
        total   += labels.size(0)
        
        TP += ((preds == 1) & (labels == 1)).sum().item()
        FP += ((preds == 1) & (labels == 0)).sum().item()
        FN += ((preds == 0) & (labels == 1)).sum().item()

accuracy = correct / total
precision = TP / (TP + FP) if (TP + FP) > 0 else 0.0
recall = TP / (TP + FN) if (TP + FN) > 0 else 0.0
f1_score = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0

print(f"Test Accuracy: {accuracy*100:.2f}%")
print(f"Precision: {precision*100:.2f}%")
print(f"Recall: {recall*100:.2f}%")
print(f"F1 Score: {f1_score*100:.2f}%")


Test Accuracy: 98.00%
Precision: 98.14%
Recall: 97.68%
F1 Score: 97.91%


In [17]:
torch.save(model.state_dict(), "model.pth")