In [None]:
from utils import load_and_prepare_data
from neural_network import *
from branchynetMPL import *
import time
from sklearn.metrics import classification_report

numerical_cols = [
        "duration",
        "dst_bytes",
        "missed_bytes",
        "src_bytes",
        "src_ip_bytes",
        "src_pkts",
        "dst_pkts",
        "dst_ip_bytes",
        "http_request_body_len",
        "http_response_body_len"

    ]

categorical_cols = [
        "proto",
        "conn_state",
        "http_status_code",
        "http_method",
        "http_orig_mime_types",
        "http_resp_mime_types",
    ]


target_col = 'type'
num_target_classes = 8
dataset_path = 'datasets/http_ton.csv'
batch_size = 1024
epochs = 50

In [2]:
train_dataloader, valid_dataloader, test_dataloader, cat_cardinalities, cw, target_names = load_and_prepare_data(
    file_path=dataset_path,
    target_col=target_col,
    numerical_cols=numerical_cols,
    categorical_cols=categorical_cols,
    batch_size=batch_size
)

embedding_dims = [min(50, (card + 1) // 2) for card in cat_cardinalities]

KeyError: "None of [Index(['IN_BYTES', 'IN_PKTS', 'OUT_BYTES', 'OUT_PKTS',\n       'FLOW_DURATION_MILLISECONDS', 'DURATION_IN', 'DURATION_OUT', 'MIN_TTL',\n       'MAX_TTL', 'LONGEST_FLOW_PKT', 'SHORTEST_FLOW_PKT', 'MIN_IP_PKT_LEN',\n       'MAX_IP_PKT_LEN', 'SRC_TO_DST_SECOND_BYTES', 'DST_TO_SRC_SECOND_BYTES',\n       'RETRANSMITTED_IN_BYTES', 'RETRANSMITTED_IN_PKTS',\n       'RETRANSMITTED_OUT_BYTES', 'RETRANSMITTED_OUT_PKTS',\n       'SRC_TO_DST_AVG_THROUGHPUT', 'DST_TO_SRC_AVG_THROUGHPUT',\n       'NUM_PKTS_UP_TO_128_BYTES', 'NUM_PKTS_128_TO_256_BYTES',\n       'NUM_PKTS_256_TO_512_BYTES', 'NUM_PKTS_512_TO_1024_BYTES',\n       'NUM_PKTS_1024_TO_1514_BYTES', 'TCP_WIN_MAX_IN', 'TCP_WIN_MAX_OUT',\n       'DNS_TTL_ANSWER', 'L4_SRC_PORT', 'L4_DST_PORT', 'PROTOCOL', 'L7_PROTO',\n       'TCP_FLAGS', 'CLIENT_TCP_FLAGS', 'SERVER_TCP_FLAGS', 'ICMP_TYPE',\n       'ICMP_IPV4_TYPE', 'DNS_QUERY_ID', 'DNS_QUERY_TYPE', 'Attack'],\n      dtype='object')] are in the [columns]"

In [None]:
model = NeuralNetwork(
    hidden_layers_sizes=[256, 256, 256], 
    cat_cardinalities=cat_cardinalities,
    embedding_dims=embedding_dims,
    num_numerical_features=len(numerical_cols),
    num_target_classes=num_target_classes,
)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
optimizer = torch.optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.85, patience=5)

In [None]:
model.fit(
    train_dataloader=train_dataloader,
    valid_dataloader=valid_dataloader,
    device=device,
    optimizer=optimizer,
    lr_scheduler=scheduler,
    epochs=epochs,
)

In [None]:
branchynet = BranchyMLP(
    num_num_features=len(numerical_cols),
    cat_cardinalities=cat_cardinalities, 
    embedding_dims=embedding_dims, 
    hidden_dims=(256,256,256), 
    num_classes=num_target_classes,
    threshold=0.8
)

In [None]:
branchynet.fit(
    train_loader=train_dataloader, 
    valid_loader=valid_dataloader, 
    optimizer=optimizer, 
    device=device, 
    epochs=epochs
)

In [None]:
y_true = torch.cat([y for _, _, y in test_dataloader]).numpy()

In [None]:
start_model = time.time()
model_preds = model.predict(test_dataloader,device)
end_model = time.time()

In [None]:
start_branchynet = time.time()
branchy_preds = branchynet.predict(test_dataloader, device)
end_branchynet = time.time()

In [None]:
dur_model   = end_model - start_model
dur_branchy = end_branchynet - start_branchynet

print(f"Classic DNN: {dur_model:.6f} s")
print(f"BranchyMLP:  {dur_branchy:.6f} s")

In [None]:
print("\n=== Classification Report DNN===")
print(classification_report(y_true, model_preds.numpy(), target_names=target_names, digits=4))
print("\n=== Classification Report BRANCHYNET===")
print(classification_report(y_true, branchy_preds.numpy(), target_names=target_names, digits=4))
