In [1]:

import os
import json
import time
from pathlib import Path
import numpy as np
import pandas as pd
import psutil
import joblib

import tensorflow as tf
from tensorflow.keras import layers, models, losses, optimizers
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# -------------------------
# User-configurable params
# -------------------------
INPUT_DIR = "./input"
MODEL_DIR = "./model"
RESULTS_DIR = "./results"
LOG_DIR = "./logs"

# Control how many CSVs to load for quick testing; set to None to load all                 =3
MAX_FILES_TO_LOAD = None

# Task mode: "auto" (detect), "binary", or "multiclass"
TASK_MODE = "multiclass"

# Training hyperparams
BATCH_SIZE = 256
EPOCHS = 30
ATTENTION_D = 128
EARLY_EXIT_THRESHOLDS = (0.95, 0.98)  # (exit1_thresh, exit2_thresh)
RANDOM_SEED = 42

# Make dirs
Path(MODEL_DIR).mkdir(parents=True, exist_ok=True)
Path(RESULTS_DIR).mkdir(parents=True, exist_ok=True)
Path(LOG_DIR).mkdir(parents=True, exist_ok=True)

np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)


In [2]:
# -------------------------
# Utility helpers
# -------------------------
def to_py(x):
    """Convert numpy scalars to Python native (so json can serialize)."""
    if isinstance(x, (np.generic, )):
        return x.item()
    try:
        return float(x) if isinstance(x, float) or isinstance(x, np.floating) else int(x)
    except Exception:
        return x

# -------------------------
# Load CSVs
# -------------------------
csv_files = sorted(Path(INPUT_DIR).glob("**/*.csv"))
if len(csv_files) == 0:
    raise FileNotFoundError(f"No CSV files found under {INPUT_DIR}. Please place CICIDS2023 CSVs there.")

if MAX_FILES_TO_LOAD is not None:
    csv_files = csv_files[:MAX_FILES_TO_LOAD]

print(f"Loading {len(csv_files)} CSV files...")
dfs = []
for f in csv_files:
    print(" -", f)
    dfs.append(pd.read_csv(f))
data = pd.concat(dfs, ignore_index=True)
print("Combined shape:", data.shape)

# -------------------------
# Detect label column
# -------------------------
label_col = None
for c in data.columns:
    if c.lower().find("label") >= 0:
        label_col = c
        break
if label_col is None:
    label_col = data.columns[-1]
print("Detected label column:", label_col)

# Inspect unique labels
unique_labels = data[label_col].unique()
print("Unique labels count:", len(unique_labels))
print("Sample unique labels (up to 20):", unique_labels[:20])

# -------------------------
# Prepare labels according to TASK_MODE
# -------------------------
if TASK_MODE == "auto":
    if len(unique_labels) == 2:
        TASK_MODE_USED = "binary"
    else:
        TASK_MODE_USED = "multiclass"
else:
    TASK_MODE_USED = TASK_MODE

print("TASK_MODE_USED =", TASK_MODE_USED)

if TASK_MODE_USED == "binary":
    # Convert to binary: BenignTraffic -> 0, others -> 1
    # Try common benign names; otherwise treat exact 'Benign' or 'BenignTraffic'
    def map_to_binary(lbl):
        if isinstance(lbl, str):
            low = lbl.strip().lower()
            if "benign" in low or "normal" in low:
                return 0
            else:
                return 1
        else:
            # numeric label? treat 0 as benign, others as attack
            return 0 if lbl == 0 else 1
    data['label_enc'] = data[label_col].apply(map_to_binary).astype(np.int32)
    class_mapping = {"Benign/Normal":0, "Attack":1}
else:
    le = LabelEncoder()
    data['label_enc'] = le.fit_transform(data[label_col].astype(str)).astype(np.int32)
    class_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
    print("Class mapping (label -> int):", class_mapping)

# -------------------------
# Feature selection: numeric columns
# -------------------------
numeric_cols = data.select_dtypes(include=[np.number]).columns.tolist()
# Remove label_enc if present in numeric_cols
if 'label_enc' in numeric_cols:
    numeric_cols.remove('label_enc')

print("Numeric feature count:", len(numeric_cols))

# Drop columns with >50% NaN
nan_frac = data[numeric_cols].isna().mean()
drop_cols = nan_frac[nan_frac > 0.5].index.tolist()
if drop_cols:
    print("Dropping columns with >50% NaN:", drop_cols)
    numeric_cols = [c for c in numeric_cols if c not in drop_cols]

# Fill remaining NaNs with median
data[numeric_cols] = data[numeric_cols].fillna(data[numeric_cols].median())

X = data[numeric_cols].astype(np.float32).values
y = data['label_enc'].values.astype(np.int32)

print("Class distribution:", dict(pd.Series(y).value_counts()))

# Optional subsample for demo if huge (you can remove this in full run)
MAX_SAMPLES = None  # set to int like 60000 for demo
if MAX_SAMPLES is not None and X.shape[0] > MAX_SAMPLES:
    _, X, _, y = train_test_split(X, y, train_size=MAX_SAMPLES, stratify=y, random_state=RANDOM_SEED)
    print("Subsampled to:", X.shape)

# Standardize features
scaler = StandardScaler()
X = scaler.fit_transform(X)
joblib.dump(scaler, Path(MODEL_DIR) / "scaler.joblib")
with open(Path(MODEL_DIR) / "feature_cols.json", "w") as f:
    json.dump(numeric_cols, f)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=RANDOM_SEED)
print("Train/Test shapes:", X_train.shape, X_test.shape)



Loading 95 CSV files...
 - input\ciciot2023_5percent_sample_part_1.csv
 - input\ciciot2023_5percent_sample_part_10.csv
 - input\ciciot2023_5percent_sample_part_11.csv
 - input\ciciot2023_5percent_sample_part_12.csv
 - input\ciciot2023_5percent_sample_part_13.csv
 - input\ciciot2023_5percent_sample_part_14.csv
 - input\ciciot2023_5percent_sample_part_15.csv
 - input\ciciot2023_5percent_sample_part_16.csv
 - input\ciciot2023_5percent_sample_part_17.csv
 - input\ciciot2023_5percent_sample_part_18.csv
 - input\ciciot2023_5percent_sample_part_19.csv
 - input\ciciot2023_5percent_sample_part_2.csv
 - input\ciciot2023_5percent_sample_part_20.csv
 - input\ciciot2023_5percent_sample_part_21.csv
 - input\ciciot2023_5percent_sample_part_22.csv
 - input\ciciot2023_5percent_sample_part_23.csv
 - input\ciciot2023_5percent_sample_part_24.csv
 - input\ciciot2023_5percent_sample_part_25.csv
 - input\ciciot2023_5percent_sample_part_26.csv
 - input\ciciot2023_5percent_sample_part_27.csv
 - input\ciciot202

In [3]:
# -------------------------
# Build ADFNet (attention + early exits)
# -------------------------
from tensorflow.keras import backend as K

def build_adfnet(input_dim, num_classes, task_mode):
    inputs = layers.Input(shape=(input_dim,), name="input_features")
    # Attention MLP
    a = layers.Dense(ATTENTION_D, activation="relu", name="att_dense1")(inputs)
    a = layers.Dense(input_dim, activation=None, name="att_dense2")(a)
    att_weights = layers.Activation("softmax", name="att_softmax")(a)  # shape (batch, input_dim)
    x_weighted = layers.Multiply(name="x_weighted")([inputs, att_weights])
    # Blocks with early exits
    b1 = layers.Dense(64, activation="relu", name="dense_1")(x_weighted)
    exit1_logits = layers.Dense(num_classes if task_mode=="multiclass" else 1, name="exit1_logits")(b1)
    exit1_prob = layers.Activation("softmax", name="exit1_prob")(
        exit1_logits) if task_mode == "multiclass" else layers.Activation("sigmoid", name="exit1_prob")(exit1_logits)
    b2 = layers.Dense(32, activation="relu", name="dense_2")(b1)
    exit2_logits = layers.Dense(num_classes if task_mode=="multiclass" else 1, name="exit2_logits")(b2)
    exit2_prob = layers.Activation("softmax", name="exit2_prob")(
        exit2_logits) if task_mode == "multiclass" else layers.Activation("sigmoid", name="exit2_prob")(exit2_logits)
    b3 = layers.Dense(16, activation="relu", name="dense_3")(b2)
    final_logits = layers.Dense(num_classes if task_mode=="multiclass" else 1, name="final_logits")(b3)
    final_prob = layers.Activation("softmax", name="final_prob")(
        final_logits) if task_mode == "multiclass" else layers.Activation("sigmoid", name="final_prob")(final_logits)

    model = models.Model(inputs=inputs, outputs=[exit1_prob, exit2_prob, final_prob, att_weights], name="ADFNet")
    return model

task_mode = TASK_MODE_USED  # "binary" or "multiclass"
num_classes = len(np.unique(y)) if task_mode == "multiclass" else 1
adf_model = build_adfnet(X_train.shape[1], num_classes, task_mode)
adf_model.summary()



Model: "ADFNet"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_features (InputLayer)    [(None, 46)]         0           []                               
                                                                                                  
 att_dense1 (Dense)             (None, 128)          6016        ['input_features[0][0]']         
                                                                                                  
 att_dense2 (Dense)             (None, 46)           5934        ['att_dense1[0][0]']             
                                                                                                  
 att_softmax (Activation)       (None, 46)           0           ['att_dense2[0][0]']             
                                                                                             

In [4]:
# -------------------------
# Training loop (end-to-end)
# -------------------------
optimizer = optimizers.Adam(learning_rate=1e-3)
if task_mode == "multiclass":
    loss_fn = losses.SparseCategoricalCrossentropy()
else:
    loss_fn = losses.BinaryCrossentropy()

alpha1, alpha2, alpha3 = 0.3, 0.3, 0.4
lambda_att = 1e-3

train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(10000).batch(BATCH_SIZE)
val_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(BATCH_SIZE)

train_loss_hist = []
for epoch in range(EPOCHS):
    t0 = time.time()
    epoch_loss = 0.0
    total = 0
    for step, (bx, by) in enumerate(train_ds):
        with tf.GradientTape() as tape:
            e1, e2, f, att = adf_model(bx, training=True)
            # compute losses (handle shapes)
            if task_mode == "multiclass":
                l1 = loss_fn(by, e1)
                l2 = loss_fn(by, e2)
                l3 = loss_fn(by, f)
            else:
                # e1,e2,f shapes are (batch,1)
                l1 = loss_fn(tf.cast(tf.reshape(by, (-1,1)), tf.float32), e1)
                l2 = loss_fn(tf.cast(tf.reshape(by, (-1,1)), tf.float32), e2)
                l3 = loss_fn(tf.cast(tf.reshape(by, (-1,1)), tf.float32), f)
            att_mean = tf.reduce_mean(att)
            loss = alpha1*l1 + alpha2*l2 + alpha3*l3 + lambda_att * att_mean
        grads = tape.gradient(loss, adf_model.trainable_variables)
        optimizer.apply_gradients(zip(grads, adf_model.trainable_variables))
        batch_n = bx.shape[0]
        epoch_loss += float(loss.numpy()) * batch_n
        total += batch_n
    epoch_loss /= total
    train_loss_hist.append(epoch_loss)
    print(f"Epoch {epoch+1}/{EPOCHS} - loss: {epoch_loss:.6f} - time: {time.time()-t0:.1f}s")

# Save model (SavedModel folder)
adf_model.save(Path(MODEL_DIR) / 'adfnet_saved', include_optimizer=False)


Epoch 1/30 - loss: 0.317761 - time: 398.1s
Epoch 2/30 - loss: 0.102919 - time: 389.9s
Epoch 3/30 - loss: 0.087276 - time: 392.3s
Epoch 4/30 - loss: 0.082001 - time: 390.9s
Epoch 5/30 - loss: 0.080510 - time: 392.9s
Epoch 6/30 - loss: 0.076183 - time: 391.6s
Epoch 7/30 - loss: 0.072901 - time: 394.7s
Epoch 8/30 - loss: 0.071325 - time: 395.4s
Epoch 9/30 - loss: 0.067938 - time: 393.8s
Epoch 10/30 - loss: 0.069532 - time: 389.4s
Epoch 11/30 - loss: 0.059831 - time: 390.2s
Epoch 12/30 - loss: 0.056030 - time: 393.9s
Epoch 13/30 - loss: 0.055295 - time: 394.7s
Epoch 14/30 - loss: 0.049370 - time: 397.8s
Epoch 15/30 - loss: 0.050933 - time: 396.4s
Epoch 16/30 - loss: 0.046765 - time: 393.4s
Epoch 17/30 - loss: 0.047430 - time: 395.2s
Epoch 18/30 - loss: 0.046174 - time: 393.5s
Epoch 19/30 - loss: 0.045623 - time: 395.5s
Epoch 20/30 - loss: 0.044985 - time: 393.8s
Epoch 21/30 - loss: 0.045813 - time: 394.7s
Epoch 22/30 - loss: 0.044294 - time: 394.6s
Epoch 23/30 - loss: 0.043274 - time: 394.

In [5]:

# -------------------------
# Inference with early exit
# -------------------------
def inference_with_early_exit(model, X_input, exit_thresholds=EARLY_EXIT_THRESHOLDS, batch_size=1024):
    n = X_input.shape[0]
    y_preds = np.zeros(n, dtype=int)
    paths = np.zeros(n, dtype=int)
    latencies = []
    att_sparsity = []
    for i in range(0, n, batch_size):
        bx = X_input[i:i+batch_size]
        t0 = time.perf_counter()
        e1, e2, f, att = model.predict(bx, verbose=0)
        t1 = time.perf_counter()
        latencies.append((t1-t0)/bx.shape[0])
        # e1,e2,f shapes:
        # - multiclass: (batch, num_classes)
        # - binary: (batch, 1)
        for j in range(bx.shape[0]):
            if task_mode == "multiclass":
                if np.max(e1[j]) > exit_thresholds[0]:
                    y_preds[i+j] = int(np.argmax(e1[j]))
                    paths[i+j] = 1
                elif np.max(e2[j]) > exit_thresholds[1]:
                    y_preds[i+j] = int(np.argmax(e2[j]))
                    paths[i+j] = 2
                else:
                    y_preds[i+j] = int(np.argmax(f[j]))
                    paths[i+j] = 3
            else:
                # binary: convert prob -> class using 0.5
                if e1[j].shape[0] == 1:
                    conf1 = float(e1[j][0])
                    conf2 = float(e2[j][0])
                    conff = float(f[j][0])
                else:
                    conf1 = float(np.max(e1[j]))
                    conf2 = float(np.max(e2[j]))
                    conff = float(np.max(f[j]))
                if conf1 > exit_thresholds[0]:
                    y_preds[i+j] = 1 if conf1 >= 0.5 else 0
                    paths[i+j] = 1
                elif conf2 > exit_thresholds[1]:
                    y_preds[i+j] = 1 if conf2 >= 0.5 else 0
                    paths[i+j] = 2
                else:
                    y_preds[i+j] = 1 if conff >= 0.5 else 0
                    paths[i+j] = 3
        att_sparsity.append(np.mean(np.count_nonzero(att > (1.0/att.shape[1]), axis=1)/att.shape[1]))
    return y_preds, paths, np.mean(latencies), np.mean(att_sparsity)

# Run inference
y_pred, paths_taken, avg_lat_s, att_s = inference_with_early_exit(adf_model, X_test, exit_thresholds=EARLY_EXIT_THRESHOLDS)
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, zero_division=0, average='weighted')
rec = recall_score(y_test, y_pred, zero_division=0, average='weighted')
f1 = f1_score(y_test, y_pred, zero_division=0, average='weighted')
cm = confusion_matrix(y_test, y_pred)

print("Accuracy:", acc)
print("Precision:", prec, "Recall:", rec, "F1:", f1)
print("Confusion matrix:\n", cm)
print("Path distribution:", pd.Series(paths_taken).value_counts().to_dict())
print("Avg latency per sample (s):", avg_lat_s)
print("Avg attention sparsity (fraction):", att_s)


Accuracy: 0.9870382864792503
Precision: 0.9854498145331525 Recall: 0.9870382864792503 F1: 0.9856689466354372
Confusion matrix:
 [[    0    17     0 ...     0     0     0]
 [    0 21412     0 ...     0     0     0]
 [    0    56     0 ...     0     0     0]
 ...
 [    0     0     0 ...   732     0     0]
 [    0    13     0 ...     0     0     0]
 [    0     0     0 ...     0     0    34]]
Path distribution: {1: 898583, 3: 34945, 2: 222}
Avg latency per sample (s): 0.0001266661028793276
Avg attention sparsity (fraction): 0.0868494410463377


In [7]:

# -------------------------
# Save metrics (safe types)
# -------------------------
metrics = {
    'accuracy': to_py(acc),
    'precision': to_py(prec),
    'recall': to_py(rec),
    'f1': to_py(f1),
    'path_dist': {str(int(k)): int(v) for k,v in pd.Series(paths_taken).value_counts().to_dict().items()},
    'avg_latency_s': to_py(avg_lat_s),
    'attention_sparsity': to_py(att_s)
}
with open(Path(RESULTS_DIR) / 'metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

# Save predictions
pd.DataFrame({'y_true': y_test.tolist(), 'y_pred': y_pred.tolist(), 'path': paths_taken.tolist()}).to_csv(Path(RESULTS_DIR) / 'predictions.csv', index=False)

# Resource metrics
total_params = int(np.sum([np.prod(v.shape) for v in adf_model.trainable_variables]))
def folder_size(path):
    total = 0
    for p in Path(path).rglob('*'):
        if p.is_file():
            total += p.stat().st_size
    return total
model_size_bytes = folder_size(Path(MODEL_DIR) / 'adfnet_saved') if (Path(MODEL_DIR) / 'adfnet_saved').exists() else 0
proc = psutil.Process(os.getpid())
mem_mb = proc.memory_info().rss / (1024*1024)

class_mapping_serializable = {str(k): int(v) for k, v in class_mapping.items()}

with open(Path(RESULTS_DIR) / 'resource_report.json', 'w') as f:
    json.dump({
        'total_params': int(total_params),
        'model_folder_bytes': int(model_size_bytes),
        'process_rss_mb': float(mem_mb),
        'task_mode': task_mode,
        'class_mapping': class_mapping_serializable
    }, f, indent=2)

# Save train loss curve if available
try:
    import matplotlib.pyplot as plt
    plt.figure(figsize=(6,4))
    plt.plot(range(1, len(train_loss_hist)+1), train_loss_hist, marker='o')
    plt.xlabel('Epoch')
    plt.ylabel('Train Loss')
    plt.title('Training Loss')
    plt.grid(True)
    plt.savefig(Path(RESULTS_DIR) / 'train_loss.png', bbox_inches='tight')
    plt.close()
except Exception:
    pass

# Save classification report
try:
    from sklearn.metrics import classification_report
    report = classification_report(y_test, y_pred, digits=4)
    with open(Path(RESULTS_DIR) / 'class_report.txt', 'w') as f:
        f.write(report)
except Exception:
    pass

print("Saved results to:", RESULTS_DIR)


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Saved results to: ./results


  _warn_prf(average, modifier, msg_start, len(result))
