In [3]:
import time
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
from sklearn.preprocessing import label_binarize
from sklearn.utils import class_weight
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from imblearn.over_sampling import SMOTE
import os
from scipy import stats

start_time = time.time()
tf.get_logger().setLevel('ERROR')

class EnhancedAdaptiveNIDS:
    def __init__(self, input_dim, latent_dim=32, learning_rate=1e-4):  # Increased latent_dim
        self.input_dim = input_dim
        self.latent_dim = latent_dim
        self.learning_rate = learning_rate
        self.model = self._build_autoencoder()
        self.threshold = None  # Will be set dynamically

    def _build_autoencoder(self):
        # Simplified architecture with better normalization
        inputs = layers.Input(shape=(self.input_dim,))
        x = layers.BatchNormalization()(inputs)
        
        # Encoder
        x = layers.Dense(128, activation='relu')(x)
        x = layers.BatchNormalization()(x)
        x = layers.Dropout(0.2)(x)
        x = layers.Dense(64, activation='relu')(x)
        x = layers.BatchNormalization()(x)
        x = layers.Dropout(0.2)(x)
        
        # Latent representation
        encoded = layers.Dense(self.latent_dim, activation='relu')(x)
        
        # Decoder
        x = layers.Dense(64, activation='relu')(encoded)
        x = layers.BatchNormalization()(x)
        x = layers.Dropout(0.2)(x)
        x = layers.Dense(128, activation='relu')(x)
        x = layers.BatchNormalization()(x)
        x = layers.Dropout(0.2)(x)
        
        # Output
        decoded = layers.Dense(self.input_dim, activation='linear')(x)
        
        # Model
        autoencoder = keras.Model(inputs=inputs, outputs=decoded)
        autoencoder.compile(
            optimizer=keras.optimizers.Adam(learning_rate=self.learning_rate),
            loss='mean_squared_error'
        )
        return autoencoder

    def train(self, X_train, X_val, epochs=50, batch_size=64):
        # Add callbacks for better training
        early_stopping = keras.callbacks.EarlyStopping(
            monitor='val_loss', 
            patience=10, 
            restore_best_weights=True
        )
        
        reduce_lr = keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=5,
            min_lr=1e-6
        )
        
        history = self.model.fit(
            X_train, X_train,
            epochs=epochs,
            batch_size=batch_size,
            validation_data=(X_val, X_val),
            callbacks=[early_stopping, reduce_lr],
            verbose=1
        )
        
        # Visualize training progress
        plt.figure(figsize=(12, 4))
        plt.subplot(1, 2, 1)
        plt.plot(history.history['loss'], label='Training Loss')
        plt.plot(history.history['val_loss'], label='Validation Loss')
        plt.title('Autoencoder Loss')
        plt.legend()
        plt.savefig('autoencoder_loss.png')
        plt.close()
        
        # Set threshold based on reconstruction errors
        self._set_dynamic_threshold(X_train)
        
        return history

    def _set_dynamic_threshold(self, X_data):
        # Calculate reconstruction errors
        reconstructed = self.model.predict(X_data)
        mse = np.mean(np.square(X_data - reconstructed), axis=1)
        
        # Set threshold using percentile instead of fixed value
        # 95th percentile means 5% of data will be flagged as anomalies
        self.threshold = np.percentile(mse, 95)
        print(f"Dynamic threshold set to: {self.threshold}")
        
        # Visualize the error distribution
        plt.figure(figsize=(10, 6))
        plt.hist(mse, bins=50)
        plt.axvline(self.threshold, color='r', linestyle='--', label=f'Threshold: {self.threshold:.6f}')
        plt.title('Reconstruction Error Distribution')
        plt.xlabel('Mean Squared Error')
        plt.ylabel('Frequency')
        plt.legend()
        plt.savefig('error_distribution.png')
        plt.close()

    def detect_anomalies(self, X_data):
        if self.threshold is None:
            raise ValueError("Model hasn't been trained yet. Call train() first.")
        
        reconstructed = self.model.predict(X_data)
        errors = np.mean(np.square(X_data - reconstructed), axis=1)
        
        anomaly_indices = np.where(errors > self.threshold)[0]
        
        # Define confidence as normalized error (optional)
        confidence = errors / np.max(errors) if len(errors) > 0 else np.array([])

        return X_data[anomaly_indices], anomaly_indices, errors, confidence


    def get_encoded_features(self, X_data):
        # Create a model that outputs the encoded features
        encoder = keras.Model(inputs=self.model.input, 
                             outputs=self.model.layers[6].output)  # Layer index may need adjustment
        return encoder.predict(X_data)


class AdaptiveNIDSLayer2:
    def __init__(self, input_dim, num_classes, seq_length=10):
        self.input_dim = input_dim
        self.num_classes = num_classes
        self.seq_length = seq_length
        self.model = self._build_model()
        self.class_weights = None  # For handling class imbalance

    # Fix for the error in AdaptiveNIDSLayer2._build_model
# The issue is with the Dot layer - the dimensions don't match between x and attention_weights

    def _build_model(self):
        from tensorflow.keras import layers, regularizers
        
        inputs = layers.Input(shape=(self.seq_length, self.input_dim))
        
        # CNN layers with L2 regularization
        x = layers.Conv1D(64, 3, activation='relu', padding='same',
                        kernel_regularizer=regularizers.l2(1e-5))(inputs)
        x = layers.BatchNormalization()(x)
        x = layers.MaxPooling1D(2)(x)
        
        x = layers.Conv1D(128, 3, activation='relu', padding='same',
                        kernel_regularizer=regularizers.l2(1e-5))(x)
        x = layers.BatchNormalization()(x)
        x = layers.MaxPooling1D(2)(x)  # Added another pooling layer
        
        # Add residual connection
        shortcut = layers.Conv1D(128, 1)(inputs)
        shortcut = layers.BatchNormalization()(shortcut)
        shortcut = layers.MaxPooling1D(4)(shortcut)  # Match the shape after two MaxPooling1D(2)
        x = layers.add([x, shortcut])
        x = layers.Activation('relu')(x)
        
        # BiLSTM layers with increased complexity
        x = layers.Bidirectional(layers.LSTM(64, return_sequences=True,
                                        kernel_regularizer=regularizers.l2(1e-5)))(x)
        x = layers.Dropout(0.4)(x)  # Increased dropout
        x = layers.Bidirectional(layers.LSTM(32,
                                        kernel_regularizer=regularizers.l2(1e-5)))(x)
        
        # Fix: Removing the attention mechanism that caused the error
        # Original problematic code:
        # attention = layers.Dense(1, activation='tanh')(x)
        # attention = layers.Flatten()(attention)
        # attention_weights = layers.Activation('softmax')(attention)
        # context_vector = layers.Dot(axes=1)([x, attention_weights])
        
        # Replacement: Use a simpler approach without the Dot layer
        context_vector = x  # Just use the BiLSTM output directly
        
        # Dense layers with residual connections
        dense1 = layers.Dense(64, activation='relu',
                            kernel_regularizer=regularizers.l2(1e-5))(context_vector)
        dense1 = layers.BatchNormalization()(dense1)
        dense1 = layers.Dropout(0.4)(dense1)
        
        # Output with temperature scaling for better calibration
        logits = layers.Dense(self.num_classes)(dense1)
        temperature = 1.5  # Temperature parameter (>1 makes predictions smoother)
        scaled_logits = layers.Lambda(lambda x: x / temperature)(logits)
        outputs = layers.Activation('softmax')(scaled_logits)
        
        from tensorflow import keras
        model = keras.Model(inputs=inputs, outputs=outputs)
        
        # Use weighted categorical crossentropy for class imbalance
        model.compile(
            optimizer=keras.optimizers.Adam(1e-3),
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy']
        )
        return model

    def compute_class_weights(self, y_train):
        """
        Calculate class weights to handle imbalance, handling float labels by converting to int
        """
        # Convert float labels to integers
        y_train_int = y_train.astype(int)
        
        # Calculate class weights
        unique_classes = np.unique(y_train_int)
        class_counts = np.bincount(y_train_int)
        total = len(y_train_int)
        
        # Create class weights dictionary
        self.class_weights = {i: total / (len(unique_classes) * count) 
                            for i, count in enumerate(class_counts)}
        print("Class weights:", self.class_weights)
        return self.class_weights

    def train(self, X_train, y_train, X_val=None, y_val=None, epochs=50, batch_size=64):
        # Compute class weights if not already done
        if self.class_weights is None:
            self.compute_class_weights(y_train)
        
        callbacks = [
            keras.callbacks.EarlyStopping(
                monitor='val_loss',
                patience=15,  # Increased patience
                restore_best_weights=True
            ),
            keras.callbacks.ReduceLROnPlateau(
                monitor='val_loss',
                factor=0.2,  # More aggressive reduction
                patience=7,
                min_lr=1e-7
            ),
            # Add model checkpoint to save best model
            keras.callbacks.ModelCheckpoint(
                'best_layer2_model.h5',
                save_best_only=True,
                monitor='val_accuracy',
                mode='max'
            ),
            # Add TensorBoard callback
            keras.callbacks.TensorBoard(
                log_dir=f'./logs/layer2_{time.strftime("%Y%m%d-%H%M%S")}',
                histogram_freq=1
            )
        ]
        
        # Use validation data if provided, otherwise use validation_split
        if X_val is not None and y_val is not None:
            history = self.model.fit(
                X_train, y_train,
                epochs=epochs,
                batch_size=batch_size,
                validation_data=(X_val, y_val),
                callbacks=callbacks,
                class_weight=self.class_weights  # Use class weights
            )
        else:
            history = self.model.fit(
                X_train, y_train,
                epochs=epochs,
                batch_size=batch_size,
                validation_split=0.2,
                callbacks=callbacks,
                class_weight=self.class_weights
            )
        
        # Plot training metrics
        plt.figure(figsize=(12, 8))
        
        # Plot losses
        plt.subplot(2, 2, 1)
        plt.plot(history.history['loss'], label='Training Loss')
        plt.plot(history.history['val_loss'], label='Validation Loss')
        plt.title('Model Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.grid(True, alpha=0.3)
        
        # Plot accuracy
        plt.subplot(2, 2, 2)
        plt.plot(history.history['accuracy'], label='Training Accuracy')
        plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
        plt.title('Model Accuracy')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.legend()
        plt.grid(True, alpha=0.3)
        
        # Plot learning rate if available
        if 'lr' in history.history:
            plt.subplot(2, 2, 3)
            plt.plot(history.history['lr'], label='Learning Rate')
            plt.title('Learning Rate')
            plt.xlabel('Epoch')
            plt.ylabel('Learning Rate')
            plt.yscale('log')
            plt.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.savefig('layer2_training_metrics.png')
        plt.close()
        
        return history

    def evaluate(self, X_test, y_test):
        # Evaluate the model
        test_loss, test_accuracy = self.model.evaluate(X_test, y_test, verbose=1)
        print(f"Test accuracy: {test_accuracy:.4f}")
        print(f"Test loss: {test_loss:.4f}")
        
        # Generate predictions
        y_pred_probs = self.model.predict(X_test)
        y_pred = np.argmax(y_pred_probs, axis=1)
        
        # Generate classification report
        report = classification_report(y_test, y_pred, output_dict=True)
        print("Classification Report:")
        print(classification_report(y_test, y_pred))
        
        # Create confusion matrix
        cm = confusion_matrix(y_test, y_pred)
        plt.figure(figsize=(10, 8))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                   xticklabels=np.unique(y_test),
                   yticklabels=np.unique(y_test))
        plt.title('Confusion Matrix')
        plt.xlabel('Predicted')
        plt.ylabel('True')
        plt.savefig('confusion_matrix_iteration_2.png')
        plt.close()
        
        # Plot ROC curve for multi-class
        plt.figure(figsize=(10, 8))
        
        # One-vs-Rest ROC curves
        if self.num_classes > 2:
            y_test_bin = label_binarize(y_test, classes=np.unique(y_test))
            fpr = dict()
            tpr = dict()
            roc_auc = dict()
            
            for i in range(self.num_classes):
                fpr[i], tpr[i], _ = roc_curve(y_test_bin[:, i], y_pred_probs[:, i])
                roc_auc[i] = auc(fpr[i], tpr[i])
                plt.plot(fpr[i], tpr[i], label=f'Class {i} (AUC = {roc_auc[i]:.2f})')
        else:
            # Binary classification
            fpr, tpr, _ = roc_curve(y_test, y_pred_probs[:, 1])
            roc_auc = auc(fpr, tpr)
            plt.plot(fpr, tpr, label=f'ROC curve (AUC = {roc_auc:.2f})')
        
        plt.plot([0, 1], [0, 1], 'k--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver Operating Characteristic (ROC) Curve')
        plt.legend(loc="lower right")
        plt.grid(True, alpha=0.3)
        plt.savefig('roc_curve_iteration_2.png')
        plt.close()
        
        return report, y_pred, y_pred_probs

    def save_model(self, filepath):
        """Save the model to disk"""
        self.model.save(filepath)
        print(f"Model saved to {filepath}")
        
    def load_model(self, filepath):
        """Load a saved model from disk"""
        self.model = keras.models.load_model(filepath)
        print(f"Model loaded from {filepath}")
        return self.model


def preprocess_data(file_path, test_size=0.2, random_state=42, use_smote=True, use_standard_scaler=False):
    # Load the dataset
    df = pd.read_csv(file_path)
    
    # Separate target from features
    y = df['label'] if 'label' in df.columns else df.iloc[:, -1]
    X = df.drop('label', axis=1) if 'label' in df.columns else df.iloc[:, :-1]
    
    # Identify columns with IP addresses or other string values that can't be converted to float
    non_numeric_columns = X.select_dtypes(include=['object']).columns.tolist()
    
    # Create a copy of the dataframe without non-numeric columns for scaling
    X_numeric = X.drop(columns=non_numeric_columns)
    
    # Simple IP encoding for the non-numeric columns
    # For example, convert '192.168.0.128' to components or just use the position in a list of unique IPs
    X_encoded = X.copy()
    for col in non_numeric_columns:
        # Create a mapping of unique values to integers
        unique_values = X[col].unique()
        value_to_int = {val: i for i, val in enumerate(unique_values)}
        X_encoded[col] = X[col].map(value_to_int)
    
    # Train-test split with the encoded data
    X_train, X_test, y_train, y_test = train_test_split(
        X_encoded, y, test_size=test_size, random_state=random_state, stratify=y
    )
    
    # Apply SMOTE to address class imbalance
    if use_smote:
        smote = SMOTE(random_state=random_state)
        X_train, y_train = smote.fit_resample(X_train, y_train)
    
    # Initialize and fit scaler on numeric columns only
    if use_standard_scaler:
        scaler = StandardScaler()  # Standardization (zero mean, unit variance)
        scaler_filename = 'standard_scaler_iteration_2.pkl'
    else:
        scaler = MinMaxScaler()  # Normalization (0-1 range)
        scaler_filename = 'minmax_scaler_iteration_2.pkl'
    
    # Save the mapping for future inference
    mapping_filename = 'ip_encoding_mapping.pkl'
    joblib.dump({col: {val: i for i, val in enumerate(X[col].unique())} 
                 for col in non_numeric_columns}, mapping_filename)
    
    # Save scaler for inference
    joblib.dump(scaler, scaler_filename)
    
    return X_train, X_test, y_train, y_test, scaler


def create_sequences(data, labels, seq_length=10, stride=1):
    """
    Enhanced sequence creation with stride option for better coverage
    """
    sequences, seq_labels = [], []
    labels = np.array(labels)
    
    for i in range(0, len(data) - seq_length + 1, stride):
        sequences.append(data[i:i + seq_length])
        seq_labels.append(labels[i + seq_length - 1])
    
    return np.array(sequences), np.array(seq_labels)


def evaluate_model(model, X_test, y_test, class_names=None):
    """
    Comprehensive model evaluation function
    """
    # Get predictions
    y_pred = np.argmax(model.predict(X_test), axis=1)
    
    # Generate classification report
    report = classification_report(y_test, y_pred, output_dict=True)
    print("Classification Report:")
    print(classification_report(y_test, y_pred))
    
    # Generate confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=class_names if class_names else None,
                yticklabels=class_names if class_names else None)
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()
    plt.savefig('confusion_matrix_iteration_2.png')
    plt.close()
    
    # Calculate and plot ROC curves if binary classification
    if len(np.unique(y_test)) == 2:
        from sklearn.metrics import roc_curve, auc
        y_pred_prob = model.predict(X_test)[:, 1]
        fpr, tpr, _ = roc_curve(y_test, y_pred_prob)
        roc_auc = auc(fpr, tpr)
        
        plt.figure(figsize=(8, 6))
        plt.plot(fpr, tpr, label=f'ROC curve (area = {roc_auc:.3f})')
        plt.plot([0, 1], [0, 1], 'k--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver Operating Characteristic')
        plt.legend(loc="lower right")
        plt.savefig('roc_curve_iteration_2.png')
        plt.close()
    
    return report


# Main execution
if __name__ == "__main__":
    # Create logs and model directories if they don't exist
    os.makedirs('./logs', exist_ok=True)
    os.makedirs('./models', exist_ok=True)
    
    # Configure GPU for TensorFlow (if available)
    gpus = tf.config.experimental.list_physical_devices('GPU')
    if gpus:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"Using GPU: {gpus}")
    else:
        print("No GPU found, using CPU.")
    
    # Set random seeds for reproducibility
    np.random.seed(42)
    tf.random.set_seed(42)
    
    # Record start time
    start_time = time.time()
    tf.get_logger().setLevel('ERROR')
    
    # Dataset path - use relative path for better portability
    dataset_path = "/Users/siddhantgond/Desktop/6THSEM/Project_Elective/Adaptive-Network-Intrusion-Detection-System/Implementation_Layer_2/df_fs_scaled.csv"
    
    # Preprocess data with SMOTE and MinMaxScaler
    X_train, X_test, y_train, y_test, scaler = preprocess_data(
        dataset_path, test_size=0.2, use_smote=True, use_standard_scaler=False
    )
    
    # Layer 1: Autoencoder for anomaly detection
    print("\nTraining Layer 1: Autoencoder...")
    layer1 = EnhancedAdaptiveNIDS(input_dim=X_train.shape[1])
    layer1.train(X_train, X_test, epochs=50, batch_size=64)
    
    # Detect anomalies using trained autoencoder
    anomalies, anomaly_indices, errors, confidence = layer1.detect_anomalies(X_test)
    
    print(f"Detected {len(anomaly_indices)} anomalies out of {len(X_test)} test samples.")
    print(f"Anomaly detection rate: {len(anomaly_indices)/len(X_test)*100:.2f}%")
    
    # Get encoded features for anomalies
    encoded_features = layer1.get_encoded_features(anomalies)
    
    # If anomalies array is empty, use a subset of the data
    if len(anomalies) == 0:
        print("No anomalies detected. Using top 10% of highest error samples.")
        top_n = int(len(X_test) * 0.1)
        sorted_indices = np.argsort(errors)[-top_n:]
        anomalies = X_test[sorted_indices]
        anomaly_indices = sorted_indices
        encoded_features = layer1.get_encoded_features(anomalies)
    
    # Get original labels for anomalies
    if isinstance(y_test, pd.Series):
        y_anomalies = y_test.iloc[anomaly_indices]
    else:
        y_anomalies = y_test[anomaly_indices]
    
    # Create sequences for Layer 2 with stride=2 for better coverage
    X_layer2, y_layer2 = create_sequences(encoded_features, y_anomalies, seq_length=10, stride=2)
    
    # Split data for Layer 2
    if len(np.unique(y_layer2)) > 1:  # Only stratify if multiple classes
        X_train_l2, X_test_l2, y_train_l2, y_test_l2 = train_test_split(
            X_layer2, y_layer2, test_size=0.2, random_state=42, stratify=y_layer2
        )
    else:
        X_train_l2, X_test_l2, y_train_l2, y_test_l2 = train_test_split(
            X_layer2, y_layer2, test_size=0.2, random_state=42
        )
    
    # Layer 2: CNN-BiLSTM Classification
    print("\nTraining Layer 2: CNN-BiLSTM...")
    layer2 = AdaptiveNIDSLayer2(
        input_dim=X_train_l2.shape[2],
        num_classes=len(np.unique(y_train_l2)),
        seq_length=10
    )
    layer2.train(X_train_l2, y_train_l2, X_test_l2, y_test_l2, epochs=50, batch_size=32)
    
    # Evaluate Layer 2 with comprehensive metrics
    class_names = [f"Class {i}" for i in range(len(np.unique(y_train_l2)))]
    evaluate_model(layer2.model, X_test_l2, y_test_l2, class_names)
    
    # Save models with timestamp
    timestamp = time.strftime("%Y%m%d-%H%M%S")
    layer1.model.save(f'models/autoencoder_model_{timestamp}.h5')
    layer2.model.save(f'models/cnn_bilstm_model_{timestamp}.h5')
    
    # Save model architecture as image
    try:
        tf.keras.utils.plot_model(layer1.model, to_file='models/autoencoder_architecture_iteration_2.png', 
                                  show_shapes=True, show_layer_names=True)
        tf.keras.utils.plot_model(layer2.model, to_file='models/cnn_bilstm_architecture_iteration_2.png', 
                                  show_shapes=True, show_layer_names=True)
    except:
        print("Couldn't save model architecture images. Make sure pydot is installed.")
    
    # Save training configuration
    config = {
        'dataset_path': dataset_path,
        'preprocessing': {
            'use_smote': True,
            'use_standard_scaler': False,
            'test_size': 0.2
        },
        'layer1': {
            'latent_dim': layer1.latent_dim,
            'learning_rate': layer1.learning_rate,
            'threshold': layer1.threshold
        },
        'layer2': {
            'seq_length': layer2.seq_length,
            'class_weights': layer2.class_weights
        },
        'timestamp': timestamp
    }
    
    with open(f'models/training_config_{timestamp}.json', 'w') as f:
        import json
        json.dump(config, f, indent=4)
    
    elapsed_time = time.time() - start_time
    print(f"\nTotal execution time: {elapsed_time:.2f} seconds ({elapsed_time/60:.2f} minutes)")
    print(f"Models saved with timestamp: {timestamp}")

No GPU found, using CPU.


  df = pd.read_csv(file_path)


TypeError: '<' not supported between instances of 'float' and 'str'