In [1]:
from utils import load_and_prepare_nb15, structured_prune_hidden, benchmark_cpu
from neural_network import *
import time
from sklearn.metrics import classification_report



numerical_cols = [
    "NUM_PKTS_128_TO_256_BYTES",
    "RETRANSMITTED_OUT_PKTS",
    "SRC_TO_DST_IAT_STDDEV",
    "SRC_TO_DST_SECOND_BYTES",
    "IN_PKTS",
    "LONGEST_FLOW_PKT",
    "NUM_PKTS_256_TO_512_BYTES",
    "DST_TO_SRC_IAT_AVG",
    "OUT_BYTES",
    "NUM_PKTS_UP_TO_128_BYTES",
    "DURATION_OUT",
    "NUM_PKTS_512_TO_1024_BYTES",
    "SRC_TO_DST_IAT_AVG",
    "DURATION_IN",
    "SHORTEST_FLOW_PKT",
    "RETRANSMITTED_IN_PKTS",
    "FLOW_DURATION_MILLISECONDS",
    "IN_BYTES",
    "MIN_IP_PKT_LEN",
    "TCP_WIN_MAX_OUT",
    "SRC_TO_DST_IAT_MIN",
    "RETRANSMITTED_OUT_BYTES",
    "DST_TO_SRC_IAT_MAX",
    "DST_TO_SRC_SECOND_BYTES",
    "DNS_TTL_ANSWER",
    "NUM_PKTS_1024_TO_1514_BYTES",
    "SRC_TO_DST_AVG_THROUGHPUT",
    "DST_TO_SRC_IAT_STDDEV",
    "OUT_PKTS",
    "SRC_TO_DST_IAT_MAX",
    "TCP_WIN_MAX_IN",
    "MAX_IP_PKT_LEN",
    "DST_TO_SRC_AVG_THROUGHPUT",
    "DST_TO_SRC_IAT_MIN",
    "RETRANSMITTED_IN_BYTES"

    ]

categorical_cols = [
    "PROTOCOL",
    "L7_PROTO",
    "TCP_FLAGS",
    "CLIENT_TCP_FLAGS",
    "SERVER_TCP_FLAGS",
    "ICMP_TYPE",
    "ICMP_IPV4_TYPE",
    "DNS_QUERY_TYPE",
    "FTP_COMMAND_RET_CODE"
    ]

target_col = 'Attack'
num_target_classes = 10
dataset_path = 'datasets/NF-UNSW-NB15-v3.csv'
batch_size = 2048
epochs = 20


In [2]:
train_dataloader, valid_dataloader, test_dataloader, cat_cardinalities, cw, target_names = load_and_prepare_nb15(
    file_path=dataset_path,
    target_col=target_col,
    numerical_cols=numerical_cols,
    categorical_cols=categorical_cols,
    batch_size=batch_size,
)

embedding_dims = [min(50, (card + 1) // 2) for card in cat_cardinalities]

In [3]:
model = NeuralNetwork(
    hidden_layers_sizes=[256, 256, 256], 
    cat_cardinalities=cat_cardinalities,
    embedding_dims=embedding_dims,
    num_numerical_features=len(numerical_cols),
    num_target_classes=num_target_classes,
)

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
optimizer = torch.optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=5)

In [6]:
model.fit(
    train_dataloader=train_dataloader,
    valid_dataloader=valid_dataloader,
    device=device,
    optimizer=optimizer,
    lr_scheduler=scheduler,
    epochs=epochs,
    weights=cw
)

--- Epoch: 0  |  Loss: 0.0827  |  F1 Score: 0.4449  |  Accuracy: 0.9789 ---
--- Epoch: 1  |  Loss: 0.0698  |  F1 Score: 0.4685  |  Accuracy: 0.9804 ---
--- Epoch: 2  |  Loss: 0.0651  |  F1 Score: 0.4928  |  Accuracy: 0.9826 ---
--- Epoch: 3  |  Loss: 0.0733  |  F1 Score: 0.4788  |  Accuracy: 0.9791 ---
--- Epoch: 4  |  Loss: 0.0594  |  F1 Score: 0.4759  |  Accuracy: 0.9799 ---
--- Epoch: 5  |  Loss: 0.0548  |  F1 Score: 0.5145  |  Accuracy: 0.9821 ---
--- Epoch: 6  |  Loss: 0.0567  |  F1 Score: 0.5019  |  Accuracy: 0.9813 ---
--- Epoch: 7  |  Loss: 0.0516  |  F1 Score: 0.5368  |  Accuracy: 0.9825 ---
--- Epoch: 8  |  Loss: 0.0513  |  F1 Score: 0.5381  |  Accuracy: 0.9830 ---
--- Epoch: 9  |  Loss: 0.0513  |  F1 Score: 0.5266  |  Accuracy: 0.9825 ---
--- Epoch: 10  |  Loss: 0.0516  |  F1 Score: 0.5318  |  Accuracy: 0.9819 ---
--- Epoch: 11  |  Loss: 0.0498  |  F1 Score: 0.5371  |  Accuracy: 0.9831 ---
--- Epoch: 12  |  Loss: 0.0516  |  F1 Score: 0.5347  |  Accuracy: 0.9821 ---
--- Epoch

In [8]:
pruned = structured_prune_hidden(model, keep_ratio=0.7, device="cpu")
pruned.fit(train_dataloader, valid_dataloader, weights=cw, device="cpu", epochs=10, lr=5e-4)

--- Epoch: 0  |  Loss: 0.0510  |  F1 Score: 0.5446  |  Accuracy: 0.9827 ---
--- Epoch: 1  |  Loss: 0.0505  |  F1 Score: 0.5480  |  Accuracy: 0.9825 ---
--- Epoch: 2  |  Loss: 0.0519  |  F1 Score: 0.5391  |  Accuracy: 0.9827 ---
--- Epoch: 3  |  Loss: 0.0512  |  F1 Score: 0.5393  |  Accuracy: 0.9826 ---
--- Epoch: 4  |  Loss: 0.0517  |  F1 Score: 0.5465  |  Accuracy: 0.9824 ---
--- Epoch: 5  |  Loss: 0.0514  |  F1 Score: 0.5439  |  Accuracy: 0.9818 ---
--- Epoch: 6  |  Loss: 0.0512  |  F1 Score: 0.5391  |  Accuracy: 0.9823 ---
--- Epoch: 7  |  Loss: 0.0494  |  F1 Score: 0.5535  |  Accuracy: 0.9828 ---
--- Epoch: 8  |  Loss: 0.0502  |  F1 Score: 0.5349  |  Accuracy: 0.9825 ---
--- Epoch: 9  |  Loss: 0.0494  |  F1 Score: 0.5562  |  Accuracy: 0.9833 ---


In [9]:
std_times = benchmark_cpu(model, test_dataloader, num_threads=1)
print(f"Standard Model:\n {std_times}")
print("--------------------")
pruned_times = benchmark_cpu(pruned, test_dataloader, num_threads=1)
print(f"Pruned model:\n {pruned_times}")

Standard Model:
 {'batch_size': 2048, 'num_threads': 1, 'median_ms': 9.778524500688945, 'p95_ms': 10.84729954945942, 'throughput_sps': 209075.66221108846}
--------------------
Pruned model:
 {'batch_size': 2048, 'num_threads': 1, 'median_ms': 6.451109000408906, 'p95_ms': 7.454160450197376, 'throughput_sps': 317922.1889819181}


In [10]:
std = std_times['throughput_sps']
pru = pruned_times['throughput_sps']

latency_reduction_pct = (1 - std / pru) * 100
print(f"Riduzione tempo di inferenza: -{latency_reduction_pct:.1f}%")

Riduzione tempo di inferenza: -34.2%


In [11]:
y_true = torch.cat([y for _, _, y in test_dataloader]).numpy()

In [12]:
model_preds = model.predict(test_dataloader,device)
pruned_preds = pruned.predict(test_dataloader,device)

In [13]:
print("\n=== Classification Report DNN===")
print(classification_report(y_true, model_preds.numpy(), target_names=target_names, digits=4))
print("\n=== Classification Report Pruned===")
print(classification_report(y_true, pruned_preds.numpy(), target_names=target_names, digits=4))



=== Classification Report DNN===
                precision    recall  f1-score   support

      Analysis     0.2664    0.9511    0.4162       184
      Backdoor     0.1306    0.3772    0.1940       517
        Benign     1.0000    0.9996    0.9998    322654
           DoS     0.2248    0.4116    0.2908       758
      Exploits     0.9112    0.5059    0.6506      5823
       Fuzzers     0.7181    0.7381    0.7280      3838
       Generic     0.5546    0.6611    0.6032       714
Reconnaissance     0.6806    0.5962    0.6356      1694
     Shellcode     0.2252    0.7647    0.3480       238
         Worms     0.4444    1.0000    0.6154        20

      accuracy                         0.9829    336440
     macro avg     0.5156    0.7006    0.5482    336440
  weighted avg     0.9886    0.9829    0.9844    336440


=== Classification Report Pruned===
                precision    recall  f1-score   support

      Analysis     0.2612    0.9511    0.4098       184
      Backdoor     0.1431    