In [None]:
import numpy as np
import pandas as pd
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from collections import deque
import random
import zipfile
import os

# Suppress warnings
pd.set_option('display.max_columns', None)
warnings.filterwarnings('ignore')

# Step 1: Unzip the file
data_path = "C:/Users/singa/Downloads/archive (6).zip"
with zipfile.ZipFile(data_path, 'r') as zip_ref:
    zip_ref.extractall('extracted_files')

# Step 2: Define column names and read the dataset
columns = (['duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes', 'land', 'wrong_fragment', 'urgent', 'hot',
            'num_failed_logins', 'logged_in', 'num_compromised', 'root_shell', 'su_attempted', 'num_root', 'num_file_creations',
            'num_shells', 'num_access_files', 'num_outbound_cmds', 'is_host_login', 'is_guest_login', 'count', 'srv_count',
            'serror_rate', 'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate', 'same_srv_rate', 'diff_srv_rate',
            'srv_diff_host_rate', 'dst_host_count', 'dst_host_srv_count', 'dst_host_same_srv_rate', 'dst_host_diff_srv_rate',
            'dst_host_same_src_port_rate', 'dst_host_srv_diff_host_rate', 'dst_host_serror_rate', 'dst_host_srv_serror_rate',
            'dst_host_rerror_rate', 'dst_host_srv_rerror_rate', 'outcome', 'level'])

train_data_path = 'extracted_files/KDDTrain+.txt'
test_data_path = 'extracted_files/KDDTest+.txt'

data_train = pd.read_csv(train_data_path, names=columns, header=None)
data_test = pd.read_csv(test_data_path, names=columns, header=None)

# Step 3: Data Preprocessing
def preprocess_data(data):
    categorical_features = ['protocol_type', 'service', 'flag']
    data = pd.get_dummies(data, columns=categorical_features)
    
    numerical_features = data.columns.difference(['outcome', 'level'])
    scaler = StandardScaler()
    data[numerical_features] = scaler.fit_transform(data[numerical_features])
    
    return data

data_train = preprocess_data(data_train)
data_test = preprocess_data(data_test)

X_train = data_train.drop(['outcome', 'level'], axis=1).values
y_train = pd.get_dummies(data_train['outcome']).values
X_test = data_test.drop(['outcome', 'level'], axis=1).values
y_test = pd.get_dummies(data_test['outcome']).values

# Step 4: Define the DDQN Agent
class DDQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # Discount rate
        self.epsilon = 1.0  # Exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.model = self._build_model()
        self.target_model = self._build_model()
        self.update_target_model()

    def _build_model(self):
        model = models.Sequential()
        model.add(layers.Dense(128, input_dim=self.state_size, activation='relu'))
        model.add(layers.Dropout(0.2))
        model.add(layers.Dense(64, activation='relu'))
        model.add(layers.Dropout(0.2))
        model.add(layers.Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(learning_rate=0.001))
        return model

    def update_target_model(self):
        self.target_model.set_weights(self.model.get_weights())

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        q_values = self.model.predict(state)
        return np.argmax(q_values[0])

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target += self.gamma * np.amax(self.target_model.predict(next_state)[0])
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

# Step 5: Train the Agent
def train_agent(episodes):
    agent = DDQNAgent(state_size=X_train.shape[1], action_size=y_train.shape[1])
    batch_size = 32

    for e in range(episodes):
        for i in range(len(X_train)):
            state = X_train[i].reshape(1, -1)
            action = agent.act(state)
            reward = 1 if np.argmax(y_train[i]) == action else -1
            next_state = X_train[i].reshape(1, -1)
            done = i == len(X_train) - 1
            agent.remember(state, action, reward, next_state, done)
            if len(agent.memory) > batch_size:
                agent.replay(batch_size)
        agent.update_target_model()
        print(f"Episode {e+1}/{episodes} completed.")

# Step 6: Evaluate the Agent
def evaluate_agent(agent):
    correct_predictions = 0
    predictions = []
    for i in range(len(X_test)):
        state = X_test[i].reshape(1, -1)
        action = agent.act(state)
        predictions.append(action)
        if np.argmax(y_test[i]) == action:
            correct_predictions += 1
    accuracy = correct_predictions / len(X_test)
    print(f"Test Accuracy: {accuracy * 100:.2f}%")
    print("Classification Report:")
    print(classification_report(np.argmax(y_test, axis=1), predictions))
    print("Confusion Matrix:")
    sns.heatmap(confusion_matrix(np.argmax(y_test, axis=1), predictions), annot=True, cmap='Blues')
    plt.show()

# Train and Evaluate the Agent
train_agent(episodes=100)
evaluate_agent(agent)
# Save the DDQN agent
agent.model.save('ddqn_agent.h5')

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from scapy.all import rdpcap, TCP, IP
from collections import deque
import random
import tensorflow as tf
from tensorflow.keras import layers, models

# Define the function to extract features from a PCAP file
def extract_features_from_pcap(pcap_file):
    packets = rdpcap(pcap_file)
    features = []

    for pkt in packets:
        if IP in pkt:
            feature = {
                'duration': pkt.time,  # Example of a simple feature
                'protocol_type': pkt[IP].proto,
                'src_bytes': len(pkt[IP].payload),
                'dst_bytes': len(pkt[IP]),  # Example features
                'flag': pkt[TCP].flags if TCP in pkt else 0,
                # Add other features based on the KDD dataset structure
            }
            features.append(feature)

    df = pd.DataFrame(features)
    return df

# Preprocessing for PCAP data
def preprocess_pcap_data(pcap_df):
    # Assuming the same categorical and numerical preprocessing as the KDD dataset
    categorical_features = ['protocol_type', 'flag']
    pcap_df = pd.get_dummies(pcap_df, columns=categorical_features)

    numerical_features = pcap_df.columns.difference(['outcome', 'level'])
    scaler = StandardScaler()
    pcap_df[numerical_features] = scaler.fit_transform(pcap_df[numerical_features])

    return pcap_df

# Define the DDQN Agent
class DDQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95    # discount rate
        self.epsilon = 1.0   # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()

    def _build_model(self):
        # Neural Network for Deep Q-learning
        model = models.Sequential()
        model.add(layers.Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(layers.Dense(24, activation='relu'))
        model.add(layers.Dense(self.action_size, activation='linear'))
        model.compile(optimizer=tf.keras.optimizers.Adam(lr=self.learning_rate), loss='mse')
        return model

    def load(self, name):
        self.model.load_weights(name)

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])  # returns action

# Integrate with DDQN agent for prediction
def test_on_pcap(pcap_file, agent):
    # Step 1: Extract features
    pcap_df = extract_features_from_pcap(pcap_file)

    # Step 2: Preprocess data
    pcap_df = preprocess_pcap_data(pcap_df)

    # Step 3: Prepare features (assuming no 'outcome' column in pcap data)
    X_pcap = pcap_df.values

    # Step 4: Predict using the trained DDQN model
    predictions = []
    for i in range(len(X_pcap)):
        state = X_pcap[i].reshape(1, -1)
        action = agent.act(state)
        predictions.append(action)

    return predictions

# Load the trained DDQN model (ensure it's loaded or trained before using)
state_size = 41  # Adjust this according to your input feature size
action_size = 5  # Adjust this according to your action space size

agent = DDQNAgent(state_size, action_size)
agent.load("path_to_trained_ddqn_model.h5")  # Replace with your model's file path

# Test the agent on a new PCAP file
pcap_file = "C:/Users/singa/OneDrive/ドキュメント/wireshark_capture.pcapng"
predictions = test_on_pcap(pcap_file, agent)

# Example output
print("Predictions for the PCAP file:")
print(predictions)