In [None]:
# Required libraries
import pandas as pd
from datetime import datetime
from pycaret.anomaly import setup, compare_models, save_model, load_model
import numpy as np

# Load and preprocess the packet data
df = pd.read_csv("packet_data.csv")
df['Time'] = pd.to_numeric(df['Time'])  # Convert Time column to numeric if it's not already

# Define feature engineering functions
def feature_extraction(df):
    features = []
    for destination in df['Destination'].unique():
        dest_df = df[df['Destination'] == destination]
        
        # Extract packet frequency feature
        packet_count = len(dest_df)
        
        # Unique sources targeting the destination
        unique_sources = dest_df['Source'].nunique()
        
        # Average packet length to the destination
        avg_length = dest_df['Length'].mean()
        
        features.append([destination, packet_count, unique_sources, avg_length])
        
    # Create a DataFrame with extracted features
    feature_df = pd.DataFrame(features, columns=['Destination', 'Packet_Count', 'Unique_Sources', 'Avg_Length'])
    return feature_df

# Extract features for training
feature_df = feature_extraction(df)

# PyCaret setup
from pycaret.anomaly import setup, create_model, assign_model, tune_model

# Setup PyCaret for anomaly detection
exp = setup(data=feature_df[['Packet_Count', 'Unique_Sources', 'Avg_Length']], silent=True, session_id=123)

# Compare models
best_model = compare_models()

# Display the best model
print("Best model:", best_model)

# Assign labels to data based on best model
predictions = assign_model(best_model)
feature_df['Anomaly_Label'] = predictions['Anomaly'].apply(lambda x: 'DDoS' if x == -1 else 'Normal')

# Save the best model for real-time usage
save_model(best_model, 'best_ddos_model')

# Real-time detection simulation
def real_time_detection(new_packets_df, model_path='best_ddos_model'):
    # Extract features from new data in real-time
    new_features = feature_extraction(new_packets_df)
    
    # Load the best saved model
    best_model = load_model(model_path)
    
    # Predict anomalies in real-time
    new_predictions = assign_model(best_model, data=new_features[['Packet_Count', 'Unique_Sources', 'Avg_Length']])
    new_features['Anomaly_Label'] = new_predictions['Anomaly'].apply(lambda x: 'DDoS' if x == -1 else 'Normal')
    
    # Display real-time alerts
    for index, row in new_features.iterrows():
        if row['Anomaly_Label'] == 'DDoS':
            print(f"Real-time Alert: Potential DDoS attack detected on destination {row['Destination']}")
    return new_features

# Simulate real-time detection with new packet data
new_packet_data = pd.read_csv("ddos_file.csv")
real_time_detection(new_packet_data)