In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, LSTM, Dense, Dropout, BatchNormalization
import matplotlib.pyplot as plt
import seaborn as sns

class CLSTMAnomalyDetector:
    """
    A class to build and train a Convolutional-LSTM (CLSTM) model for anomaly detection.
    
    Attributes:
    -----------
    lookback : int
        The number of previous timesteps to consider for each sequence.
    model : tf.keras.Model
        The CLSTM model.
    history : History object
        Stores model training history.
    input_shape : tuple
        The shape of the input data for the model.
    """
    
    def __init__(self, lookback=10):
        """Initialize the CLSTMAnomalyDetector with the given lookback period."""
        self.lookback = lookback
        self.model = None
        self.history = None
        self.input_shape = None
    
    def prepare_sequences(self, X, y=None):
        """
        Convert data into sequences of the given lookback size.
        
        Parameters:
        -----------
        X : np.ndarray
            The input feature array.
        y : np.ndarray or None, optional
            The target array (if available).
        
        Returns:
        --------
        np.ndarray
            The sequence data as an array.
        """
        sequences_X = []
        sequences_y = []
        
        for i in range(len(X) - self.lookback):
            sequences_X.append(X[i:(i + self.lookback)])
            if y is not None:
                sequences_y.append(y[i + self.lookback])
        
        if y is not None:
            return np.array(sequences_X), np.array(sequences_y)
        return np.array(sequences_X)
    
    def build_model(self, input_shape):
        """
        Build the CLSTM model architecture.
        
        Parameters:
        -----------
        input_shape : tuple
            Shape of the input data (timesteps, features).
        
        Returns:
        --------
        tf.keras.Model
            The compiled CLSTM model.
        """
        self.input_shape = input_shape
        
        model = Sequential([
            # First Convolutional Layer
            Conv1D(filters=64, kernel_size=3, activation='relu', 
                  input_shape=input_shape, padding='same'),
            BatchNormalization(),
            
            # Second Convolutional Layer
            Conv1D(filters=128, kernel_size=3, activation='relu', padding='same'),
            BatchNormalization(),
            
            # LSTM Layers
            LSTM(128, return_sequences=True),
            Dropout(0.3),
            LSTM(64),
            Dropout(0.3),
            
            # Dense Layers for classification
            Dense(64, activation='relu'),
            BatchNormalization(),
            Dropout(0.3),
            Dense(32, activation='relu'),
            Dense(1, activation='sigmoid')  # Output layer for binary classification
        ])
        
        # Compile the model with Adam optimizer and binary crossentropy loss
        model.compile(optimizer='adam',
                      loss='binary_crossentropy',
                      metrics=['accuracy'])
        
        self.model = model
        return model
    
    def train(self, X_train, y_train, X_val, y_val, epochs=50, batch_size=64):
        """
        Train the CLSTM model with training and validation data.
        
        Parameters:
        -----------
        X_train : np.ndarray
            Training features.
        y_train : np.ndarray
            Training labels.
        X_val : np.ndarray
            Validation features.
        y_val : np.ndarray
            Validation labels.
        epochs : int, optional
            Number of training epochs.
        batch_size : int, optional
            Batch size for training.
        
        Returns:
        --------
        History object
            The training history.
        """
        # Train the model with early stopping based on validation loss
        self.history = self.model.fit(
            X_train, y_train,
            epochs=epochs,
            batch_size=batch_size,
            validation_data=(X_val, y_val),
            callbacks=[tf.keras.callbacks.EarlyStopping(
                monitor='val_loss', patience=5, restore_best_weights=True
            )]
        )
        return self.history
    
    def plot_training_history(self):
        """Plot the training and validation loss and accuracy over epochs."""
        plt.figure(figsize=(12, 4))
        
        # Plot loss over epochs
        plt.subplot(1, 2, 1)
        plt.plot(self.history.history['loss'], label='Training Loss')
        plt.plot(self.history.history['val_loss'], label='Validation Loss')
        plt.title('Model Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        
        # Plot accuracy over epochs
        plt.subplot(1, 2, 2)
        plt.plot(self.history.history['accuracy'], label='Training Accuracy')
        plt.plot(self.history.history['val_accuracy'], label='Validation Accuracy')
        plt.title('Model Accuracy')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.legend()
        
        plt.tight_layout()
        plt.show()
    
    def plot_confusion_matrix(self, y_true, y_pred):
        """Plot the confusion matrix for the true and predicted labels."""
        cm = confusion_matrix(y_true, y_pred.round())
        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
        plt.title('Confusion Matrix')
        plt.ylabel('True Label')
        plt.xlabel('Predicted Label')
        plt.show()

def main():
    """
    Main function to load data, train the CLSTM model, and evaluate it.
    """
    # Load preprocessed data
    print("Loading data...")
    df = pd.read_csv("preprocessed_ddos_dataset_1.csv")
    
    # Separate features (X) and target (y)
    X = df.drop('Label', axis=1).values
    y = (df['Label'] != 0).astype(int)  # Convert labels to binary (0 or 1)
    
    # Split the data into training, validation, and test sets
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)
    
    # Initialize the anomaly detector
    print("Preparing sequences...")
    detector = CLSTMAnomalyDetector(lookback=10)
    
    # Prepare sequences for the CLSTM model
    X_train_seq, y_train_seq = detector.prepare_sequences(X_train, y_train)
    X_val_seq, y_val_seq = detector.prepare_sequences(X_val, y_val)
    X_test_seq, y_test_seq = detector.prepare_sequences(X_test, y_test)
    
    # Build and train the CLSTM model
    print("Building and training model...")
    detector.build_model(input_shape=(X_train_seq.shape[1], X_train_seq.shape[2]))
    detector.model.summary()  # Display model architecture
    
    # Train the model
    history = detector.train(X_train_seq, y_train_seq, X_val_seq, y_val_seq, epochs=50, batch_size=64)
    
    # Evaluate the model on test data
    print("\nEvaluating model...")
    test_loss, test_accuracy = detector.model.evaluate(X_test_seq, y_test_seq)
    print(f"\nTest Accuracy: {test_accuracy:.4f}")
    
    # Make predictions on test data
    y_pred = detector.model.predict(X_test_seq)
    
    # Print classification report
    print("\nClassification Report:")
    print(classification_report(y_test_seq, y_pred.round()))
    
    # Plot the training history and confusion matrix
    detector.plot_training_history()
    detector.plot_confusion_matrix(y_test_seq, y_pred)
    
    # Save the trained model
    detector.model.save('clstm_anomaly_detector.h5')
    print("\nModel saved as 'clstm_anomaly_detector.h5'")

if __name__ == "__main__":
    main()
