In [21]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import joblib
import os
import time

start_time = time.time()

# Suppress TensorFlow warnings
tf.get_logger().setLevel('ERROR')

class AdaptiveNIDS:
    def __init__(self, input_dim, latent_dim=32, learning_rate=1e-3):
        """
        Initialize Adaptive Network Intrusion Detection System
        
        Args:
            input_dim (int): Number of input features
            latent_dim (int): Dimensionality of the latent space
            learning_rate (float): Initial learning rate for Adam optimizer
        """
        self.input_dim = input_dim
        self.latent_dim = latent_dim
        self.learning_rate = learning_rate
        
        # Build model components
        self.model = self._build_autoencoder_cnn_model()
        
    def _build_autoencoder_cnn_model(self):
        """
        Construct Autoencoder-LSTM with CNN Feature Enhancement
        
        Returns:
            keras.Model: Compiled Autoencoder model
        """
        # Input Layer
        inputs = layers.Input(shape=(self.input_dim,))
        
        # Reshape for 1D CNN
        x = layers.Reshape((-1, 1))(inputs)
        
        # CNN Feature Enhancement
        x = layers.Conv1D(
            filters=64, 
            kernel_size=3, 
            activation='relu', 
            padding='same'
        )(x)
        x = layers.BatchNormalization()(x)
        x = layers.MaxPooling1D(pool_size=2)(x)
        x = layers.Dropout(0.3)(x)
        
        # LSTM Encoder
        x = layers.LSTM(
            units=self.latent_dim, 
            return_sequences=False
        )(x)
        
        # Latent Representation
        encoded = layers.Dense(
            self.latent_dim, 
            activation='relu'
        )(x)
        
        # LSTM Decoder
        x = layers.RepeatVector(self.input_dim)(encoded)
        x = layers.LSTM(
            units=self.latent_dim, 
            return_sequences=True
        )(x)
        
        # Output Reconstruction
        decoded = layers.TimeDistributed(
            layers.Dense(1, activation='linear')
        )(x)
        
        # Flatten for proper shape
        decoded = layers.Flatten()(decoded)
        
        # Create Autoencoder Model
        autoencoder = keras.Model(inputs=inputs, outputs=decoded)
        
        # Create Optimizer with direct learning rate
        optimizer = keras.optimizers.Adam(learning_rate=self.learning_rate)
        
        # Compile with Adam and MSE Loss
        autoencoder.compile(
            optimizer=optimizer, 
            loss='mean_squared_error'
        )
        
        return autoencoder
    
    def train(self, X_train, X_val=None, epochs=50, batch_size=32):
        """
        Train the Autoencoder model
        
        Args:
            X_train (np.array): Training data
            X_val (np.array, optional): Validation data
            epochs (int): Number of training epochs
            batch_size (int): Batch size for training
        
        Returns:
            history: Training history
        """
        # Early Stopping to prevent overfitting
        early_stopping = keras.callbacks.EarlyStopping(
            monitor='val_loss', 
            patience=10, 
            restore_best_weights=True
        )
        
        # Reduce learning rate on plateau
        reduce_lr = keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss', 
            factor=0.5, 
            patience=5,
            min_lr=1e-5
        )
        
        history = self.model.fit(
            X_train, X_train,  # Autoencoder reconstructs input
            epochs=epochs,
            batch_size=batch_size,
            validation_data=(X_val, X_val) if X_val is not None else None,
            callbacks=[early_stopping, reduce_lr],
            verbose=1
        )
        
        return history
    
    def calculate_threshold(self, X_val, percentile=95):
        """
        Calculate reconstruction error threshold
        
        Args:
            X_val (np.array): Validation data
            percentile (float): Percentile for anomaly threshold
        
        Returns:
            float: Anomaly detection threshold
        """
        reconstructions = self.model.predict(X_val)
        reconstruction_errors = np.mean(np.square(X_val - reconstructions), axis=1)
        return np.percentile(reconstruction_errors, percentile)
    
    def detect_anomalies(self, X_test, threshold):
        """
        Detect anomalies in network traffic
        
        Args:
            X_test (np.array): Test data
            threshold (float): Anomaly detection threshold
        
        Returns:
            np.array: Boolean mask of anomalies
        """
        reconstructions = self.model.predict(X_test)
        mse = np.mean(np.square(X_test - reconstructions), axis=1)
        return mse > threshold
    
    def save_model(self, model_path='autoencoder_lstm_model.h5'):
        """
        Save trained model
        
        Args:
            model_path (str): Path to save model
        """
        self.model.save(model_path)
        print(f"Model saved to {model_path}")

def preprocess_data(file_path, test_size=0.2, random_state=42):
    """
    Preprocess network traffic dataset
    
    Args:
        file_path (str): Path to preprocessed scaled dataset
        test_size (float): Proportion of validation data
        random_state (int): Random seed for reproducibility
    
    Returns:
        Tuple of preprocessed training and validation datasets
    """
    try:
        # Load preprocessed scaled dataset
        df = pd.read_csv(file_path)
        
        # Separate features (assuming 'label' is the target column)
        X = df.drop(['Attack_label'], axis=1).values
        
        # Split data
        X_train, X_val = train_test_split(
            X, 
            test_size=test_size, 
            random_state=random_state
        )
        
        return X_train, X_val
    
    except Exception as e:
        print(f"Error in data preprocessing: {e}")
        raise

def main():
    # File paths
    dataset_path = 'training_dataset.csv'
    model_save_path = 'autoencoder_lstm_model.h5'
    threshold_save_path = 'anomaly_threshold.pkl'
    
    try:
        # Preprocess data
        X_train, X_val = preprocess_data(dataset_path)
        
        # Print data shapes for verification
        print(f"Training data shape: {X_train.shape}")
        print(f"Validation data shape: {X_val.shape}")
        
        # Initialize NIDS
        nids = AdaptiveNIDS(input_dim=X_train.shape[1])
        
        # Train model
        history = nids.train(X_train, X_val)
        
        # Calculate anomaly threshold
        threshold = nids.calculate_threshold(X_val)
        print(f"Anomaly Threshold: {threshold}")
        
        # Save model and threshold
        nids.save_model(model_save_path)
        
        # Save threshold for inference
        joblib.dump({'threshold': threshold}, threshold_save_path)
        print(f"Threshold saved to {threshold_save_path}")
        
    except Exception as e:
        print(f"An error occurred during NIDS training: {e}")

if __name__ == '__main__':
    main()

end_time = time.time()
ex_time = end_time - start_time
ex_time

Training data shape: (57120, 44)
Validation data shape: (14281, 44)
Epoch 1/50
[1m1785/1785[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 12ms/step - loss: 38514476.0000 - val_loss: 38364216.0000 - learning_rate: 0.0010
Epoch 2/50
[1m1785/1785[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 11ms/step - loss: 38753532.0000 - val_loss: 38291340.0000 - learning_rate: 0.0010
Epoch 3/50
[1m1785/1785[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 12ms/step - loss: 38246884.0000 - val_loss: 38218564.0000 - learning_rate: 0.0010
Epoch 4/50
[1m1785/1785[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 12ms/step - loss: 38247920.0000 - val_loss: 38145996.0000 - learning_rate: 0.0010
Epoch 5/50
[1m1785/1785[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 12ms/step - loss: 38397080.0000 - val_loss: 38073584.0000 - learning_rate: 0.0010
Epoch 6/50
[1m1785/1785[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 11ms/step - loss: 38432724.0000 - val_loss



Anomaly Threshold: 97150638.76216795
Model saved to autoencoder_lstm_model.h5
Threshold saved to anomaly_threshold.pkl


1033.3725581169128

In [25]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import joblib
import os
import time

start_time = time.time()

# Suppress TensorFlow warnings
tf.get_logger().setLevel('ERROR')

class AdaptiveNIDS:
    def __init__(self, input_dim, latent_dim=32, learning_rate=1e-3):
        """
        Initialize Adaptive Network Intrusion Detection System
        
        Args:
            input_dim (int): Number of input features
            latent_dim (int): Dimensionality of the latent space
            learning_rate (float): Initial learning rate for Adam optimizer
        """
        self.input_dim = input_dim
        self.latent_dim = latent_dim
        self.learning_rate = learning_rate
        
        # Build model components
        self.model = self._build_autoencoder_cnn_model()
        
    def _build_autoencoder_cnn_model(self):
        """
        Construct Autoencoder-LSTM with CNN Feature Enhancement
        
        Returns:
            keras.Model: Compiled Autoencoder model
        """
        # Input Layer
        inputs = layers.Input(shape=(self.input_dim,))
        
        # Reshape for 1D CNN
        x = layers.Reshape((-1, 1))(inputs)
        
        # CNN Feature Enhancement
        x = layers.Conv1D(
            filters=64, 
            kernel_size=3, 
            activation='relu', 
            padding='same'
        )(x)
        x = layers.BatchNormalization()(x)
        x = layers.MaxPooling1D(pool_size=2)(x)
        x = layers.Dropout(0.3)(x)
        
        # LSTM Encoder
        x = layers.LSTM(
            units=self.latent_dim, 
            return_sequences=False
        )(x)
        
        # Latent Representation
        encoded = layers.Dense(
            self.latent_dim, 
            activation='relu'
        )(x)
        
        # LSTM Decoder
        x = layers.RepeatVector(self.input_dim)(encoded)
        x = layers.LSTM(
            units=self.latent_dim, 
            return_sequences=True
        )(x)
        
        # Output Reconstruction
        decoded = layers.TimeDistributed(
            layers.Dense(1, activation='linear')
        )(x)
        
        # Flatten for proper shape
        decoded = layers.Flatten()(decoded)
        
        # Create Autoencoder Model
        autoencoder = keras.Model(inputs=inputs, outputs=decoded)
        
        # Create Optimizer with direct learning rate
        optimizer = keras.optimizers.Adam(learning_rate=self.learning_rate)
        
        # Compile with Adam and MSE Loss
        autoencoder.compile(
            optimizer=optimizer, 
            loss='mean_squared_error'
        )
        
        return autoencoder
    
    def train(self, X_train, X_val=None, epochs=50, batch_size=16):
        """
        Train the Autoencoder model
        
        Args:
            X_train (np.array): Training data
            X_val (np.array, optional): Validation data
            epochs (int): Number of training epochs
            batch_size (int): Batch size for training
        
        Returns:
            history: Training history
        """
        # Early Stopping to prevent overfitting
        early_stopping = keras.callbacks.EarlyStopping(
            monitor='val_loss', 
            patience=10, 
            restore_best_weights=True
        )
        
        # Reduce learning rate on plateau
        reduce_lr = keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss', 
            factor=0.5, 
            patience=5,
            min_lr=1e-5
        )
        
        history = self.model.fit(
            X_train, X_train,  # Autoencoder reconstructs input
            epochs=epochs,
            batch_size=batch_size,
            validation_data=(X_val, X_val) if X_val is not None else None,
            callbacks=[early_stopping, reduce_lr],
            verbose=1
        )
        
        return history
    
    def calculate_threshold(self, X_val, percentile=95):
        """
        Calculate reconstruction error threshold
        
        Args:
            X_val (np.array): Validation data
            percentile (float): Percentile for anomaly threshold
        
        Returns:
            float: Anomaly detection threshold
        """
        reconstructions = self.model.predict(X_val)
        reconstruction_errors = np.mean(np.square(X_val - reconstructions), axis=1)
        return np.percentile(reconstruction_errors, percentile)
    
    def detect_anomalies(self, X_test, threshold):
        """
        Detect anomalies in network traffic
        
        Args:
            X_test (np.array): Test data
            threshold (float): Anomaly detection threshold
        
        Returns:
            np.array: Boolean mask of anomalies
        """
        reconstructions = self.model.predict(X_test)
        mse = np.mean(np.square(X_test - reconstructions), axis=1)
        return mse > threshold
    
    def save_model(self, model_path='autoencoder_lstm_model.h5'):
        """
        Save trained model
        
        Args:
            model_path (str): Path to save model
        """
        self.model.save(model_path)
        print(f"Model saved to {model_path}")

def preprocess_data(file_path, test_size=0.2, random_state=42):
    """
    Preprocess network traffic dataset
    
    Args:
        file_path (str): Path to preprocessed scaled dataset
        test_size (float): Proportion of validation data
        random_state (int): Random seed for reproducibility
    
    Returns:
        Tuple of preprocessed training and validation datasets
    """
    try:
        # Load preprocessed scaled dataset
        df = pd.read_csv(file_path)
        
        # Separate features (assuming 'label' is the target column)
        X = df.drop(['Attack_label'], axis=1).values
        
        # Split data
        X_train, X_val = train_test_split(
            X, 
            test_size=test_size, 
            random_state=random_state
        )
        
        return X_train, X_val
    
    except Exception as e:
        print(f"Error in data preprocessing: {e}")
        raise

def main():
    # File paths
    dataset_path = 'training_dataset.csv'
    model_save_path = 'autoencoder_lstm_model_1.h5'
    threshold_save_path = 'anomaly_threshold.pkl'
    
    try:
        # Preprocess data
        X_train, X_val = preprocess_data(dataset_path)
        
        # Print data shapes for verification
        print(f"Training data shape: {X_train.shape}")
        print(f"Validation data shape: {X_val.shape}")
        
        # Initialize NIDS
        nids = AdaptiveNIDS(input_dim=X_train.shape[1])
        
        # Train model
        history = nids.train(X_train, X_val)
        
        # Calculate anomaly threshold
        threshold = nids.calculate_threshold(X_val)
        print(f"Anomaly Threshold: {threshold}")
        
        # Save model and threshold
        nids.save_model(model_save_path)
        
        # Save threshold for inference
        joblib.dump({'threshold': threshold}, threshold_save_path)
        print(f"Threshold saved to {threshold_save_path}")
        
    except Exception as e:
        print(f"An error occurred during NIDS training: {e}")

if __name__ == '__main__':
    main()

end_time = time.time()
ex_time = end_time - start_time
ex_time

Training data shape: (57120, 44)
Validation data shape: (14281, 44)
Epoch 1/50
[1m3570/3570[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 8ms/step - loss: 38681420.0000 - val_loss: 38291984.0000 - learning_rate: 0.0010
Epoch 2/50
[1m3570/3570[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 8ms/step - loss: 38536464.0000 - val_loss: 38147552.0000 - learning_rate: 0.0010
Epoch 3/50
[1m3570/3570[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 8ms/step - loss: 38568508.0000 - val_loss: 38003676.0000 - learning_rate: 0.0010
Epoch 4/50
[1m3570/3570[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 8ms/step - loss: 38124660.0000 - val_loss: 37859768.0000 - learning_rate: 0.0010
Epoch 5/50
[1m3570/3570[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 8ms/step - loss: 37992204.0000 - val_loss: 37716492.0000 - learning_rate: 0.0010
Epoch 6/50
[1m3570/3570[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 8ms/step - loss: 37917864.0000 - val_loss: 3757



Anomaly Threshold: 90658422.49167582
Model saved to autoencoder_lstm_model_1.h5
Threshold saved to anomaly_threshold.pkl


1509.0542860031128

In [27]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import joblib
import os
import time

start_time = time.time()

# Suppress TensorFlow warnings
tf.get_logger().setLevel('ERROR')

class AdaptiveNIDS:
    def __init__(self, input_dim, latent_dim=32, learning_rate=1e-3):
        """
        Initialize Adaptive Network Intrusion Detection System
        
        Args:
            input_dim (int): Number of input features
            latent_dim (int): Dimensionality of the latent space
            learning_rate (float): Initial learning rate for Adam optimizer
        """
        self.input_dim = input_dim
        self.latent_dim = latent_dim
        self.learning_rate = learning_rate
        
        # Build model components
        self.model = self._build_autoencoder_cnn_model()
        
    def _build_autoencoder_cnn_model(self):
        """
        Construct Autoencoder-LSTM with CNN Feature Enhancement
        
        Returns:
            keras.Model: Compiled Autoencoder model
        """
        # Input Layer
        inputs = layers.Input(shape=(self.input_dim,))
        
        # Reshape for 1D CNN
        x = layers.Reshape((-1, 1))(inputs)
        
        # CNN Feature Enhancement
        x = layers.Conv1D(
            filters=64, 
            kernel_size=3, 
            activation='relu', 
            padding='same'
        )(x)
        x = layers.BatchNormalization()(x)
        x = layers.MaxPooling1D(pool_size=2)(x)
        x = layers.Dropout(0.3)(x)
        
        # LSTM Encoder
        x = layers.LSTM(
            units=self.latent_dim, 
            return_sequences=False
        )(x)
        
        # Latent Representation
        encoded = layers.Dense(
            self.latent_dim, 
            activation='relu'
        )(x)
        
        # LSTM Decoder
        x = layers.RepeatVector(self.input_dim)(encoded)
        x = layers.LSTM(
            units=self.latent_dim, 
            return_sequences=True
        )(x)
        
        # Output Reconstruction
        decoded = layers.TimeDistributed(
            layers.Dense(1, activation='linear')
        )(x)
        
        # Flatten for proper shape
        decoded = layers.Flatten()(decoded)
        
        # Create Autoencoder Model
        autoencoder = keras.Model(inputs=inputs, outputs=decoded)
        
        # Create Optimizer with direct learning rate
        optimizer = keras.optimizers.Adam(learning_rate=self.learning_rate)
        
        # Compile with Adam and MSE Loss
        autoencoder.compile(
            optimizer=optimizer, 
            loss='mean_squared_error'
        )
        
        return autoencoder
    
    def train(self, X_train, X_val=None, epochs=50, batch_size=64):
        """
        Train the Autoencoder model
        
        Args:
            X_train (np.array): Training data
            X_val (np.array, optional): Validation data
            epochs (int): Number of training epochs
            batch_size (int): Batch size for training
        
        Returns:
            history: Training history
        """
        # Early Stopping to prevent overfitting
        early_stopping = keras.callbacks.EarlyStopping(
            monitor='val_loss', 
            patience=10, 
            restore_best_weights=True
        )
        
        # Reduce learning rate on plateau
        reduce_lr = keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss', 
            factor=0.5, 
            patience=5,
            min_lr=1e-5
        )
        
        history = self.model.fit(
            X_train, X_train,  # Autoencoder reconstructs input
            epochs=epochs,
            batch_size=batch_size,
            validation_data=(X_val, X_val) if X_val is not None else None,
            callbacks=[early_stopping, reduce_lr],
            verbose=1
        )
        
        return history
    
    def calculate_threshold(self, X_val, percentile=95):
        """
        Calculate reconstruction error threshold
        
        Args:
            X_val (np.array): Validation data
            percentile (float): Percentile for anomaly threshold
        
        Returns:
            float: Anomaly detection threshold
        """
        reconstructions = self.model.predict(X_val)
        reconstruction_errors = np.mean(np.square(X_val - reconstructions), axis=1)
        return np.percentile(reconstruction_errors, percentile)
    
    def detect_anomalies(self, X_test, threshold):
        """
        Detect anomalies in network traffic
        
        Args:
            X_test (np.array): Test data
            threshold (float): Anomaly detection threshold
        
        Returns:
            np.array: Boolean mask of anomalies
        """
        reconstructions = self.model.predict(X_test)
        mse = np.mean(np.square(X_test - reconstructions), axis=1)
        return mse > threshold
    
    def save_model(self, model_path='autoencoder_lstm_model.h5'):
        """
        Save trained model
        
        Args:
            model_path (str): Path to save model
        """
        self.model.save(model_path)
        print(f"Model saved to {model_path}")

def preprocess_data(file_path, test_size=0.2, random_state=42):
    """
    Preprocess network traffic dataset
    
    Args:
        file_path (str): Path to preprocessed scaled dataset
        test_size (float): Proportion of validation data
        random_state (int): Random seed for reproducibility
    
    Returns:
        Tuple of preprocessed training and validation datasets
    """
    try:
        # Load preprocessed scaled dataset
        df = pd.read_csv(file_path)
        
        # Separate features (assuming 'label' is the target column)
        X = df.drop(['Attack_label'], axis=1).values
        
        # Split data
        X_train, X_val = train_test_split(
            X, 
            test_size=test_size, 
            random_state=random_state
        )
        
        return X_train, X_val
    
    except Exception as e:
        print(f"Error in data preprocessing: {e}")
        raise

def main():
    # File paths
    dataset_path = 'training_dataset.csv'
    model_save_path = 'autoencoder_lstm_model.h5'
    threshold_save_path = 'anomaly_threshold.pkl'
    
    try:
        # Preprocess data
        X_train, X_val = preprocess_data(dataset_path)
        
        # Print data shapes for verification
        print(f"Training data shape: {X_train.shape}")
        print(f"Validation data shape: {X_val.shape}")
        
        # Initialize NIDS
        nids = AdaptiveNIDS(input_dim=X_train.shape[1])
        
        # Train model
        history = nids.train(X_train, X_val)
        
        # Calculate anomaly threshold
        threshold = nids.calculate_threshold(X_val)
        print(f"Anomaly Threshold: {threshold}")
        
        # Save model and threshold
        nids.save_model(model_save_path)
        
        # Save threshold for inference
        joblib.dump({'threshold': threshold}, threshold_save_path)
        print(f"Threshold saved to {threshold_save_path}")
        
    except Exception as e:
        print(f"An error occurred during NIDS training: {e}")

if __name__ == '__main__':
    main()

end_time = time.time()
ex_time = end_time - start_time
ex_time

Training data shape: (57120, 44)
Validation data shape: (14281, 44)
Epoch 1/50
[1m893/893[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 17ms/step - loss: 38557228.0000 - val_loss: 38400276.0000 - learning_rate: 0.0010
Epoch 2/50
[1m893/893[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 16ms/step - loss: 38655144.0000 - val_loss: 38363304.0000 - learning_rate: 0.0010
Epoch 3/50
[1m893/893[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 16ms/step - loss: 38484036.0000 - val_loss: 38326648.0000 - learning_rate: 0.0010
Epoch 4/50
[1m893/893[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 16ms/step - loss: 38300188.0000 - val_loss: 38290116.0000 - learning_rate: 0.0010
Epoch 5/50
[1m893/893[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 16ms/step - loss: 38626332.0000 - val_loss: 38253680.0000 - learning_rate: 0.0010
Epoch 6/50
[1m893/893[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 16ms/step - loss: 38344596.0000 - val_loss: 38217256.0



Anomaly Threshold: 100510342.91244602
Model saved to autoencoder_lstm_model.h5
Threshold saved to anomaly_threshold.pkl


757.7933580875397

In [29]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import joblib
import os
import time

start_time = time.time()

# Suppress TensorFlow warnings
tf.get_logger().setLevel('ERROR')

class AdaptiveNIDS:
    def __init__(self, input_dim, latent_dim=32, learning_rate=1e-3):
        """
        Initialize Adaptive Network Intrusion Detection System
        
        Args:
            input_dim (int): Number of input features
            latent_dim (int): Dimensionality of the latent space
            learning_rate (float): Initial learning rate for Adam optimizer
        """
        self.input_dim = input_dim
        self.latent_dim = latent_dim
        self.learning_rate = learning_rate
        
        # Build model components
        self.model = self._build_autoencoder_cnn_model()
        
    def _build_autoencoder_cnn_model(self):
        """
        Construct Autoencoder-LSTM with CNN Feature Enhancement
        
        Returns:
            keras.Model: Compiled Autoencoder model
        """
        # Input Layer
        inputs = layers.Input(shape=(self.input_dim,))
        
        # Reshape for 1D CNN
        x = layers.Reshape((-1, 1))(inputs)
        
        # CNN Feature Enhancement
        x = layers.Conv1D(
            filters=64, 
            kernel_size=3, 
            activation='relu', 
            padding='same'
        )(x)
        x = layers.BatchNormalization()(x)
        x = layers.MaxPooling1D(pool_size=2)(x)
        x = layers.Dropout(0.3)(x)
        
        # LSTM Encoder
        x = layers.LSTM(
            units=self.latent_dim, 
            return_sequences=False
        )(x)
        
        # Latent Representation
        encoded = layers.Dense(
            self.latent_dim, 
            activation='relu'
        )(x)
        
        # LSTM Decoder
        x = layers.RepeatVector(self.input_dim)(encoded)
        x = layers.LSTM(
            units=self.latent_dim, 
            return_sequences=True
        )(x)
        
        # Output Reconstruction
        decoded = layers.TimeDistributed(
            layers.Dense(1, activation='linear')
        )(x)
        
        # Flatten for proper shape
        decoded = layers.Flatten()(decoded)
        
        # Create Autoencoder Model
        autoencoder = keras.Model(inputs=inputs, outputs=decoded)
        
        # Create Optimizer with direct learning rate
        optimizer = keras.optimizers.Adam(learning_rate=self.learning_rate)
        
        # Compile with Adam and MSE Loss
        autoencoder.compile(
            optimizer=optimizer, 
            loss='mean_squared_error'
        )
        
        return autoencoder
    
    def train(self, X_train, X_val=None, epochs=50, batch_size=128):
        """
        Train the Autoencoder model
        
        Args:
            X_train (np.array): Training data
            X_val (np.array, optional): Validation data
            epochs (int): Number of training epochs
            batch_size (int): Batch size for training
        
        Returns:
            history: Training history
        """
        # Early Stopping to prevent overfitting
        early_stopping = keras.callbacks.EarlyStopping(
            monitor='val_loss', 
            patience=10, 
            restore_best_weights=True
        )
        
        # Reduce learning rate on plateau
        reduce_lr = keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss', 
            factor=0.5, 
            patience=5,
            min_lr=1e-5
        )
        
        history = self.model.fit(
            X_train, X_train,  # Autoencoder reconstructs input
            epochs=epochs,
            batch_size=batch_size,
            validation_data=(X_val, X_val) if X_val is not None else None,
            callbacks=[early_stopping, reduce_lr],
            verbose=1
        )
        
        return history
    
    def calculate_threshold(self, X_val, percentile=95):
        """
        Calculate reconstruction error threshold
        
        Args:
            X_val (np.array): Validation data
            percentile (float): Percentile for anomaly threshold
        
        Returns:
            float: Anomaly detection threshold
        """
        reconstructions = self.model.predict(X_val)
        reconstruction_errors = np.mean(np.square(X_val - reconstructions), axis=1)
        return np.percentile(reconstruction_errors, percentile)
    
    def detect_anomalies(self, X_test, threshold):
        """
        Detect anomalies in network traffic
        
        Args:
            X_test (np.array): Test data
            threshold (float): Anomaly detection threshold
        
        Returns:
            np.array: Boolean mask of anomalies
        """
        reconstructions = self.model.predict(X_test)
        mse = np.mean(np.square(X_test - reconstructions), axis=1)
        return mse > threshold
    
    def save_model(self, model_path='autoencoder_lstm_model.h5'):
        """
        Save trained model
        
        Args:
            model_path (str): Path to save model
        """
        self.model.save(model_path)
        print(f"Model saved to {model_path}")

def preprocess_data(file_path, test_size=0.2, random_state=42):
    """
    Preprocess network traffic dataset
    
    Args:
        file_path (str): Path to preprocessed scaled dataset
        test_size (float): Proportion of validation data
        random_state (int): Random seed for reproducibility
    
    Returns:
        Tuple of preprocessed training and validation datasets
    """
    try:
        # Load preprocessed scaled dataset
        df = pd.read_csv(file_path)
        
        # Separate features (assuming 'label' is the target column)
        X = df.drop(['Attack_label'], axis=1).values
        
        # Split data
        X_train, X_val = train_test_split(
            X, 
            test_size=test_size, 
            random_state=random_state
        )
        
        return X_train, X_val
    
    except Exception as e:
        print(f"Error in data preprocessing: {e}")
        raise

def main():
    # File paths
    dataset_path = 'training_dataset.csv'
    model_save_path = 'autoencoder_lstm_model.h5'
    threshold_save_path = 'anomaly_threshold.pkl'
    
    try:
        # Preprocess data
        X_train, X_val = preprocess_data(dataset_path)
        
        # Print data shapes for verification
        print(f"Training data shape: {X_train.shape}")
        print(f"Validation data shape: {X_val.shape}")
        
        # Initialize NIDS
        nids = AdaptiveNIDS(input_dim=X_train.shape[1])
        
        # Train model
        history = nids.train(X_train, X_val)
        
        # Calculate anomaly threshold
        threshold = nids.calculate_threshold(X_val)
        print(f"Anomaly Threshold: {threshold}")
        
        # Save model and threshold
        nids.save_model(model_save_path)
        
        # Save threshold for inference
        joblib.dump({'threshold': threshold}, threshold_save_path)
        print(f"Threshold saved to {threshold_save_path}")
        
    except Exception as e:
        print(f"An error occurred during NIDS training: {e}")

if __name__ == '__main__':
    main()

end_time = time.time()
ex_time = end_time - start_time
ex_time

Training data shape: (57120, 44)
Validation data shape: (14281, 44)
Epoch 1/50
[1m447/447[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 33ms/step - loss: 38723436.0000 - val_loss: 38417708.0000 - learning_rate: 0.0010
Epoch 2/50
[1m447/447[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 28ms/step - loss: 38521992.0000 - val_loss: 38398636.0000 - learning_rate: 0.0010
Epoch 3/50
[1m447/447[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 27ms/step - loss: 38342388.0000 - val_loss: 38379996.0000 - learning_rate: 0.0010
Epoch 4/50
[1m447/447[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 29ms/step - loss: 38777364.0000 - val_loss: 38361532.0000 - learning_rate: 0.0010
Epoch 5/50
[1m447/447[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 29ms/step - loss: 38829800.0000 - val_loss: 38343148.0000 - learning_rate: 0.0010
Epoch 6/50
[1m447/447[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 28ms/step - loss: 38579472.0000 - val_loss: 38324776.0



Anomaly Threshold: 102216439.90409833
Model saved to autoencoder_lstm_model.h5
Threshold saved to anomaly_threshold.pkl


658.4905989170074