In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import joblib
from tensorflow.keras.layers import Dense, Input, Dropout, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

2025-04-03 04:49:23.119268: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1743655763.198938    1989 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1743655763.225223    1989 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1743655763.408932    1989 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1743655763.409133    1989 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1743655763.409137    1989 computation_placer.cc:177] computation placer alr

In [2]:
train = pd.read_csv("../DATA/train.csv")

  train = pd.read_csv("../DATA/train.csv")


In [3]:
train = train.dropna()

In [4]:
train['Protocol'] = pd.to_numeric(train['Protocol'], errors='coerce') 
train = train.dropna(subset=['Protocol']) 
train['Protocol'] = train['Protocol'].astype(int)

In [5]:
columns = list(train.columns) 
columns = [col for col in columns if col not in ['Fwd PSH Flags', 'RST Flag Cnt', 'PSH Flag Cnt', 'ACK Flag Cnt', 'Protocol', 'Label']]

In [6]:
x = train.drop(columns=['Label'])
y = train['Label']

In [7]:
x[columns] = x[columns].apply(pd.to_numeric, errors='coerce')
f=['Fwd PSH Flags', 'RST Flag Cnt', 'PSH Flag Cnt', 'ACK Flag Cnt']
x[f] = x[f].apply(pd.to_numeric, errors='coerce')
x = x.fillna(0)

In [8]:
sc = joblib.load('../FEATURE_EXTRACTION/scaler.pkl')  
x[columns] = sc.transform(x[columns])

In [9]:
le = LabelEncoder()
ye = le.fit_transform(y)
class_counts = np.bincount(ye)
class_weights = {i: 1./count for i, count in enumerate(class_counts)}

In [10]:
x_train, x_test, y_train, y_test = train_test_split(x, ye, test_size=0.2, random_state=42,stratify=ye)


In [11]:
def load_dbn_weights(layer_files):
    weights = []
    for file in layer_files:
        data = np.load(file)
        weights.append({
            'W': data['W'],
            'bh': data['bh'],
            'bv': data['bv']
        })
    return weights

In [12]:
dbn_weights = load_dbn_weights([
    "../FEATURE_EXTRACTION/rbm_layer_1_weights.npz",
    "../FEATURE_EXTRACTION/rbm_layer_2_weights.npz",
    "../FEATURE_EXTRACTION/rbm_layer_3_weights.npz"
])

In [13]:
def build_mlp(input_dim, num_classes, dbn_weights):
    inputs = Input(shape=(input_dim,))
    
    # DBN-initialized layers with trainable options
    x = Dense(128, activation='swish',  # Swish often outperforms sigmoid/relu
              kernel_initializer=tf.constant_initializer(dbn_weights[0]['W']),
              bias_initializer=tf.constant_initializer(dbn_weights[0]['bh']),
              kernel_regularizer=l2(1e-4))(inputs)
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)
    
    x = Dense(64, activation='swish',
              kernel_initializer=tf.constant_initializer(dbn_weights[1]['W']),
              bias_initializer=tf.constant_initializer(dbn_weights[1]['bh']),
              kernel_regularizer=l2(1e-4))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)
    
    x = Dense(32, activation='swish',
              kernel_initializer=tf.constant_initializer(dbn_weights[2]['W']),
              bias_initializer=tf.constant_initializer(dbn_weights[2]['bh']),
              kernel_regularizer=l2(1e-4))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)
    
    # Additional task-specific layers
    x = Dense(64, activation='swish', kernel_regularizer=l2(1e-4))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.4)(x)
    
    x = Dense(32, activation='swish', kernel_regularizer=l2(1e-4))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.4)(x)
    
    # Output layer
    outputs = Dense(num_classes, activation='softmax')(x)
    
    model = Model(inputs=inputs, outputs=outputs)
    return model


In [14]:
num_classes = len(np.unique(y))
model = build_mlp(x_train.shape[1], num_classes, dbn_weights)

model.compile(optimizer=Adam(learning_rate=0.001, beta_1=0.9,
    beta_2=0.999,
    epsilon=1e-07,
    amsgrad=True),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

I0000 00:00:1743655855.428843    1989 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 20933 MB memory:  -> device: 0, name: NVIDIA A10G, pci bus id: 0000:00:1e.0, compute capability: 8.6


In [15]:
callbacks = [
    EarlyStopping(
        monitor='val_accuracy',
        patience=15,
        restore_best_weights=True,
        mode='max'
    ),
    ReduceLROnPlateau(
        monitor='val_accuracy',
        factor=0.5,
        patience=7,
        min_lr=1e-6,
        mode='max'
    ),
    ModelCheckpoint(
        'best_model.h5',
        monitor='val_accuracy',
        save_best_only=True,
        mode='max'
    )
]

In [None]:
print("\nStarting two-phase training...")

# Phase 1: Train only the new layers (freeze DBN layers)
for layer in model.layers[:6]:  # First 6 layers are DBN-initialized
    layer.trainable = False

print("Phase 1: Training new layers only")
history1 = model.fit(
    x_train, y_train,
    validation_data=(x_test, y_test),
    epochs=50,
    batch_size=2048,  # Larger batch size for stability
    class_weight=class_weights,
    callbacks=callbacks,
    verbose=1
)

# Phase 2: Fine-tune all layers with lower learning rate
for layer in model.layers:
    layer.trainable = True

model.compile(
    optimizer=Adam(learning_rate=1e-5),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

print("\nPhase 2: Fine-tuning all layers")
history2 = model.fit(
    x_train, y_train,
    validation_data=(x_test, y_test),
    epochs=100,
    batch_size=1024,
    class_weight=class_weights,
    callbacks=callbacks,
    verbose=1
)


Starting two-phase training...
Phase 1: Training new layers only


Epoch 1/50


I0000 00:00:1743655864.360011    4394 service.cc:152] XLA service 0x7f11cc018a80 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1743655864.360086    4394 service.cc:160]   StreamExecutor device (0): NVIDIA A10G, Compute Capability 8.6
2025-04-03 04:51:04.459267: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1743655864.840410    4394 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m  20/4756[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m12s[0m 3ms/step - accuracy: 0.0649 - loss: 0.0430   

I0000 00:00:1743655868.252169    4394 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m4756/4756[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.0475 - loss: 0.0064



[1m4756/4756[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 4ms/step - accuracy: 0.0475 - loss: 0.0064 - val_accuracy: 0.0286 - val_loss: 2.7240 - learning_rate: 0.0010
Epoch 2/50
[1m4756/4756[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 3ms/step - accuracy: 0.0395 - loss: 4.4027e-06 - val_accuracy: 1.1212e-04 - val_loss: 2.5512 - learning_rate: 0.0010
Epoch 3/50
[1m4756/4756[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 3ms/step - accuracy: 0.0848 - loss: 3.7848e-06 - val_accuracy: 1.1212e-04 - val_loss: 2.7347 - learning_rate: 0.0010
Epoch 4/50
[1m4756/4756[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 3ms/step - accuracy: 0.0844 - loss: 3.9616e-06 - val_accuracy: 0.0027 - val_loss: 2.6971 - learning_rate: 0.0010
Epoch 5/50
[1m4756/4756[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3ms/step - accuracy: 0.0736 - loss: 4.8912e-06 - val_accuracy: 0.0178 - val_loss: 2.4603 - learning_rate: 0.0010
Epoch 6/50
[1m4756/4756[0m [32m━━━━━━━━



[1m4756/4756[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 3ms/step - accuracy: 0.0905 - loss: 4.4737e-06 - val_accuracy: 0.5614 - val_loss: 2.3056 - learning_rate: 0.0010
Epoch 8/50
[1m4756/4756[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 3ms/step - accuracy: 0.0734 - loss: 3.9549e-06 - val_accuracy: 1.3347e-04 - val_loss: 2.3923 - learning_rate: 0.0010
Epoch 9/50
[1m4756/4756[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 3ms/step - accuracy: 0.0469 - loss: 5.3678e-06 - val_accuracy: 0.0068 - val_loss: 2.3156 - learning_rate: 0.0010
Epoch 10/50
[1m4756/4756[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 3ms/step - accuracy: 0.0497 - loss: 3.5614e-06 - val_accuracy: 0.0356 - val_loss: 2.5770 - learning_rate: 0.0010
Epoch 11/50
[1m4756/4756[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 2ms/step - accuracy: 0.0453 - loss: 5.3037e-06 - val_accuracy: 0.0177 - val_loss: 2.8566 - learning_rate: 0.0010
Epoch 12/50
[1m4756/4756[0m [32m━━━━━