In [51]:
import pandas as pd
import numpy as np
import gym

class DDoSEnv(gym.Env):
    def __init__(self, dataset):
        super(DDoSEnv, self).__init__()
        self.data = dataset
        self.current_step = 0
        self.max_steps = len(self.data) - 1
        
        # Define action and observation space
        self.action_space = gym.spaces.Discrete(2)  # Normal or DDoS
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(11,), dtype=np.float32)  
        
    def reset(self):
        self.current_step = 0
        return self._next_observation()
    
    def _next_observation(self):
        obs = pd.get_dummies(self.data.iloc[self.current_step][['Source IP']])
        obs = obs.values.astype(np.float32)
        return obs

    
    def step(self, action):
        self.current_step += 1
        done = self.current_step >= self.max_steps
        reward = self._get_reward(action)
        obs = self._next_observation()
        return obs, reward, done, {}
    
    def _get_reward(self, action):
        label = self.data.iloc[self.current_step]['Label']  # Assuming the column name is DDoS_Label
        if action == label:
            return 1.0  # Correct detection
        else:
            return -1.0  # Incorrect detection

# Load dataset
dataset_path = ('/Users/selmael-korchi/db2.csv' )
dataset = pd.read_csv(dataset_path)
# Remove leading spaces from column names
df.columns = df.columns.str.strip()


# Create environment
env = DDoSEnv(dataset)

# Verify environment setup
observation = env.reset()
print("Observation space:", env.observation_space)
print("Action space:", env.action_space)


Observation space: Box(0.0, 1.0, (11,), float32)
Action space: Discrete(2)


In [59]:
import pandas as pd
import numpy as np
import gym

class DDoSEnv(gym.Env):
    def __init__(self, dataset):
        super(DDoSEnv, self).__init__()
        self.data = dataset
        self.current_step = 0
        self.max_steps = len(self.data) - 1
        
        # Define action and observation space
        self.action_space = gym.spaces.Discrete(2)  # Normal or DDoS
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(11,), dtype=np.float32)  
        
    def reset(self):
        self.current_step = 0
        return self._next_observation()
    
    def _next_observation(self):
        obs = pd.get_dummies(self.data.iloc[self.current_step][['Source IP']])
        obs = obs.values.astype(np.float32)
        return obs
    
    def step(self, action):
        correct_action = self.data.iloc[self.current_step]['Label']
        correct_action_value = 1 if correct_action == "Portmap" else 0
        reward = 1 if action == correct_action_value else -1
        self.current_step += 1
        done = self.current_step >= self.max_steps
        next_state = self._next_observation() if not done else None
        return next_state, reward, done, {}

# Load dataset
dataset_path = ('/Users/selmael-korchi/db2.csv')
dataset = pd.read_csv(dataset_path)
dataset.columns = dataset.columns.str.strip()  # Remove leading spaces from column names

# Create environment
env = DDoSEnv(dataset)

# Define Q-learning parameters
alpha = 0.1  # Learning rate
gamma = 0.6  # Discount factor
epsilon = 1.0  # Exploration rate (start with exploration)

# Initialize Q-table
num_states = env.observation_space.shape[0]
num_actions = env.action_space.n
q_table = np.zeros([num_states, num_actions])

# Define training parameters
num_episodes = 1000
max_steps_per_episode = len(dataset)

# Q-learning algorithm
for episode in range(num_episodes):
    state = env.reset()
    done = False
    total_reward = 0
    
    for step in range(max_steps_per_episode):
        # Convert state to integer index
        state_index = np.argmax(state)
        
        # Exploration-exploitation trade-off
        if np.random.uniform(0, 1) < epsilon:
            action = env.action_space.sample()  # Explore action space
        else:
            action = np.argmax(q_table[state_index, :])  # Exploit learned values
        
        # Take action, observe next state and reward
        next_state, reward, done, _ = env.step(action)
        
        # Convert next_state to integer index
        next_state_index = np.argmax(next_state)
        
        # Update Q-table
        q_table[state_index, action] = (1 - alpha) * q_table[state_index, action] + alpha * (reward + gamma * np.max(q_table[next_state_index, :]))
        
        total_reward += reward
        state = next_state
        
        if done:
            break
    
    # Decay epsilon
    epsilon = max(0.1, epsilon * 0.99)  # Ensure epsilon doesn't go below 0.1
    
    # Print episode statistics
    print(f"Episode {episode + 1}: Total Reward = {total_reward}, Exploration Rate = {epsilon}")

print("Training completed.")

Episode 1: Total Reward = 104, Exploration Rate = 0.99
Episode 2: Total Reward = 198, Exploration Rate = 0.9801
Episode 3: Total Reward = -38, Exploration Rate = 0.9702989999999999
Episode 4: Total Reward = 242, Exploration Rate = 0.96059601
Episode 5: Total Reward = 312, Exploration Rate = 0.9509900498999999
Episode 6: Total Reward = 202, Exploration Rate = 0.9414801494009999
Episode 7: Total Reward = 330, Exploration Rate = 0.9320653479069899
Episode 8: Total Reward = 560, Exploration Rate = 0.92274469442792
Episode 9: Total Reward = 614, Exploration Rate = 0.9135172474836407
Episode 10: Total Reward = 708, Exploration Rate = 0.9043820750088043
Episode 11: Total Reward = 734, Exploration Rate = 0.8953382542587163
Episode 12: Total Reward = 960, Exploration Rate = 0.8863848717161291
Episode 13: Total Reward = 956, Exploration Rate = 0.8775210229989678
Episode 14: Total Reward = 1032, Exploration Rate = 0.8687458127689781
Episode 15: Total Reward = 1138, Exploration Rate = 0.8600583546

KeyboardInterrupt: 

In [54]:
import pandas as pd
import numpy as np
import gym

class DDoSEnv(gym.Env):
    def __init__(self, dataset):
        super(DDoSEnv, self).__init__()
        self.data = dataset
        self.current_step = 0
        self.max_steps = len(self.data) - 1
        
        # Define action and observation space
        self.action_space = gym.spaces.Discrete(2)  # Normal or DDoS
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(11,), dtype=np.float32)  
    
    def reset(self):
        self.current_step = 0
        return self._next_observation()
    
    def _next_observation(self):
        obs = pd.get_dummies(self.data.iloc[self.current_step][['Source IP']])
        obs = obs.values.astype(np.float32)
        return obs
    
    def step(self, action):
        self.current_step += 1
        done = self.current_step >= self.max_steps
        reward = self._get_reward(action)
        obs = self._next_observation()
        return obs, reward, done, {}
    
    def _get_reward(self, action):
        label = self.data.iloc[self.current_step]['Label']  # Assuming the column name is DDoS_Label
        if action == label:
            return 1.0  # Correct detection
        else:
            return -1.0  # Incorrect detection

# Load dataset
dataset_path = ('/Users/selmael-korchi/db2.csv' )
dataset = pd.read_csv(dataset_path)
# Remove leading spaces from column names
df.columns = df.columns.str.strip()

# Create environment
env = DDoSEnv(dataset)

# Define Q-learning parameters
alpha = 0.1  # Learning rate
gamma = 0.6  # Discount factor
epsilon = 0.1  # Exploration rate

# Initialize Q-table
num_states = env.observation_space.shape[0]
num_actions = env.action_space.n
q_table = np.zeros([num_states, num_actions])

# Define training parameters
num_episodes = 1000
max_steps_per_episode = len(dataset)

# Q-learning algorithm

for episode in range(num_episodes):
    state = env.reset()
    done = False
    total_reward = 0

    for step in range(max_steps_per_episode):

        # Convert state to integer index
        state_index = np.argmax(state)

        # Exploration-exploitation trade-off

        if np.random.uniform(0, 1) < epsilon:
            action = env.action_space.sample()  # Explore action space
        else:
            action = np.argmax(q_table[state_index, :])  # Exploit learned values

        # Take action, observe next state and reward

        next_state, reward, done, _ = env.step(action)


        # Convert next_state to integer index
        next_state_index = np.argmax(next_state)
        

        # Update Q-table

        q_table[state_index, action] = (1 - alpha) * q_table[state_index, action] + alpha * (reward + gamma * np.max(q_table[next_state_index, :]))
        total_reward += reward
        state = next_state
        if done:

            break

    # Decay epsilon
    epsilon = max(0.1, epsilon * 0.99)
    # Print episode statistics
    print(f"Episode {episode + 1}: Total Reward = {total_reward}, Exploration Rate = {epsilon}")


Episode 1: Total Reward = -9998.0, Exploration Rate = 0.1
Episode 2: Total Reward = -9998.0, Exploration Rate = 0.1
Episode 3: Total Reward = -9998.0, Exploration Rate = 0.1
Episode 4: Total Reward = -9998.0, Exploration Rate = 0.1
Episode 5: Total Reward = -9998.0, Exploration Rate = 0.1
Episode 6: Total Reward = -9998.0, Exploration Rate = 0.1
Episode 7: Total Reward = -9998.0, Exploration Rate = 0.1
Episode 8: Total Reward = -9998.0, Exploration Rate = 0.1
Episode 9: Total Reward = -9998.0, Exploration Rate = 0.1
Episode 10: Total Reward = -9998.0, Exploration Rate = 0.1
Episode 11: Total Reward = -9998.0, Exploration Rate = 0.1
Episode 12: Total Reward = -9998.0, Exploration Rate = 0.1
Episode 13: Total Reward = -9998.0, Exploration Rate = 0.1
Episode 14: Total Reward = -9998.0, Exploration Rate = 0.1
Episode 15: Total Reward = -9998.0, Exploration Rate = 0.1


KeyboardInterrupt: 

In [10]:
import pandas as pd

# Load dataset
dataset_path = "/Users/selmael-korchi/Dataset-DDoS.csv"
dataset = pd.read_csv(dataset_path)

# Verify column names
print(dataset.columns)


Index([' Source IP', ' Source Port', ' Destination IP', ' Destination Port',
       ' Timestamp', ' Total Fwd Packets', ' Total Backward Packets'],
      dtype='object')


In [41]:
import pandas as pd

# Load the dataset from CSV
dataset_path = "/Users/selmael-korchi/Dataset-DDoS.csv"
dataset = pd.read_csv(dataset_path)

# Specify the path where you want to save the Excel file
excel_path = "/Users/selmael-korchi/Dataset-DDoS-new.xlsx"

# Convert the dataset to Excel format and save it
dataset.to_excel(excel_path, index=False)

print(dataset)

           Source IP   Source Port  Destination IP   Destination Port  \
0     192.168.50.254             0       224.0.0.5                  0   
1     192.168.50.253             0       224.0.0.5                  0   
2       192.168.50.6         54799   172.217.10.98                443   
3       192.168.50.6         54800     172.217.7.2                443   
4       192.168.50.6         54801   172.217.10.98                443   
...              ...           ...             ...                ...   
9994      172.16.0.5           648    192.168.50.4              38265   
9995      172.16.0.5           648    192.168.50.4              20343   
9996      172.16.0.5           642    192.168.50.4              48659   
9997      172.16.0.5           648    192.168.50.4              14536   
9998      172.16.0.5           648    192.168.50.4              46858   

                       Timestamp   Total Fwd Packets   Total Backward Packets  
0     2018-11-03 09:18:16.964447           

In [27]:
import pandas as pd
import numpy as np
import gym
from sklearn.preprocessing import OneHotEncoder

class DDoSEnv(gym.Env):
    def __init__(self, dataset_path):
        super(DDoSEnv, self).__init__()
        self.dataset_path = dataset_path
        self.load_dataset()
    
    def load_dataset(self):
        self.dataset = pd.read_csv(self.dataset_path)
    def reset(self):
        # Reset environment state
        pass
    
    def step(self, action):
        # Perform one step in the environment
        pass
    
    def render(self):
        # Render environment state
        pass

# Example usage:
dataset_path = "/Users/selmael-korchi/Dataset-DDoS.csv"


In [30]:
import gym

class DDoSEnv(gym.Env):
    def __init__(self, dataset):
        super(DDoSEnv, self).__init__()
        self.action_space = gym.spaces.Discrete(2)  


    def step(self, action):
        observation = ...  # Compute the observation after taking the action
        reward = ...  # Compute the reward based on the action and new state
        done = ...  # Determine if the episode is finished
        info = {}  # Additional information, if needed

        return observation, reward, done, info



dataset_path = "/Users/selmael-korchi/Dataset-DDoS.csv"  # Replace with your dataset path
env = DDoSEnv(dataset_path)

# Reset the environment to its initial state
observation = env.reset()

# Perform some steps in the environment and observe the results
for _ in range(10):
    # Sample a random action
    action = env.action_space.sample()
    
    # Perform the action and observe the next state, reward, and whether the episode is done
    observation, reward, done, info = env.step(action)
    
    # Print the results
    print("Observation:", observation)
    print("Reward:", reward)
    print("Done:", done)
    print("Info:", info)
    
    # Check if the episode is done
    if done:
        print("Episode finished after", _ + 1, "steps")
        break


Observation: Ellipsis
Reward: Ellipsis
Done: Ellipsis
Info: {}
Episode finished after 1 steps


In [33]:
import gym
import numpy as np

class DDoSEnv(gym.Env):
    def __init__(self, observation_size, num_actions, max_steps):
        # Define observation space
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(observation_size,), dtype=np.float32)
        
        # Define action space
        self.action_space = gym.spaces.Discrete(num_actions)
        
        # Define other attributes as needed
        self.max_steps = max_steps
        self.current_step = 0
        self.observation = np.random.rand(observation_size)  # Initialize with random observation

    def reset(self):
        # Reset the environment to its initial state
        self.current_step = 0
        self.observation = np.random.rand(self.observation_space.shape[0])  # Reset to random observation
        
        return self.observation

    def step(self, action):
        # Execute one time step within the environment
        self.current_step += 1
        
        # Update the environment based on the action
        # Calculate the next observation, reward, and whether the episode is done
        
        # Example:
        next_observation = self.observation + action  # Replace with actual environment dynamics
        reward = self._calculate_reward()  # Implement reward calculation
        done = self.current_step >= self.max_steps
        
        self.observation = next_observation
        
        return next_observation, reward, done, {}
    
    def _calculate_reward(self):
        # Implement reward calculation based on the current observation
        # Example:
        return np.sum(self.observation)

# Example usage:
observation_size = 10
num_actions = 2
max_steps = 100

env = DDoSEnv(observation_size, num_actions, max_steps)
observation = env.reset()
for _ in range(10):
    action = env.action_space.sample()  # Sample a random action
    observation, reward, done, info = env.step(action)
    print("Observation:", observation, "Reward:", reward, "Done:", done)
    if done:
        break


Observation: [0.55648471 0.43039136 0.48305208 0.92300214 0.44471455 0.48877233
 0.94000009 0.25164366 0.42336469 0.99787281] Reward: 5.939298431718971 Done: False
Observation: [0.55648471 0.43039136 0.48305208 0.92300214 0.44471455 0.48877233
 0.94000009 0.25164366 0.42336469 0.99787281] Reward: 5.939298431718971 Done: False
Observation: [1.55648471 1.43039136 1.48305208 1.92300214 1.44471455 1.48877233
 1.94000009 1.25164366 1.42336469 1.99787281] Reward: 5.939298431718971 Done: False
Observation: [2.55648471 2.43039136 2.48305208 2.92300214 2.44471455 2.48877233
 2.94000009 2.25164366 2.42336469 2.99787281] Reward: 15.939298431718973 Done: False
Observation: [3.55648471 3.43039136 3.48305208 3.92300214 3.44471455 3.48877233
 3.94000009 3.25164366 3.42336469 3.99787281] Reward: 25.93929843171897 Done: False
Observation: [4.55648471 4.43039136 4.48305208 4.92300214 4.44471455 4.48877233
 4.94000009 4.25164366 4.42336469 4.99787281] Reward: 35.93929843171897 Done: False
Observation: [5

In [22]:
import pandas as pd

# Load your dataset
file_path = "/Users/selmael-korchi/Dataset-DDoS-new.xlsx"
df = pd.read_excel(file_path, engine='openpyxl')

# Remove leading spaces from column names
df.columns = df.columns.str.strip()

# Manually label instances based on criteria
# For example, let's say instances with more than 100 total forward packets are labeled as attacks
df['label'] = df['Total Fwd Packets'].apply(lambda x: 1 if x > 100 else 0)

# Save the labeled dataset
labeled_file_path = "/Users/selmael-korchi/Labeled-Dataset-DDoS.xlsx"
df.to_excel(labeled_file_path, index=False)

In [49]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Step 1: Load and prepare the data

db_path = '/Users/selmael-korchi/db2.csv'  # Adjust the file path
data = pd.read_csv(db_path)

# Assuming there's a 'Label' column indicating 1 for DDOS attack and 0 for normal
# Simplified data preprocessing
features = ['Source Port', 'Destination Port', 'Total Fwd Packets', 'Total Backward Packets']  
X = data[features]
y = data['Label']  

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

X_train = pd.DataFrame(X_train).reset_index(drop=True)
X_test = pd.DataFrame(X_test).reset_index(drop=True)
y_train = pd.Series(y_train).reset_index(drop=True)
y_test = pd.Series(y_test).reset_index(drop=True)


# Step 2: Define the reinforcement learning environment

class Environment:
    def __init__(self, X, y):
        self.X = X
        self.y = y
        self.current_index = 0
    
    def reset(self):
        self.current_index = 0
        return self.X.iloc[self.current_index]  
    
    def step(self, action):
        correct_action = self.y.iloc[self.current_index]  
        correct_action_value = 1 if correct_action == "Portmap" else 0
        reward = 1 if action == correct_action_value else -1
        self.current_index += 1
        done = self.current_index >= len(self.X)  
        next_state = self.X.iloc[self.current_index] if not done else None  
        return next_state, reward, done


# Step 3: Implementation of the agent using Q-learning with ε-greedy

class Agent:
    def __init__(self, n_states, n_actions, learning_rate=0.1, discount_rate=0.95, exploration_rate=1.0, min_exploration_rate=0.01, exploration_decay_rate=0.99):
        self.q_table = np.zeros((n_states, n_actions))
        self.learning_rate = learning_rate
        self.discount_rate = discount_rate
        self.exploration_rate = exploration_rate
        self.min_exploration_rate = min_exploration_rate
        self.exploration_decay_rate = exploration_decay_rate
        self.n_actions = n_actions
        self.state_to_index_mapping = {}  
        self.next_state_index = 0 
    
    def choose_action(self, state):
        state_index = self.state_to_index(state)  
        if np.random.rand() < self.exploration_rate:
            return np.random.randint(self.n_actions)
        else:
            return np.argmax(self.q_table[state_index])
    
    def update_q_table(self, state, action, reward, next_state):
        state_index = self.state_to_index(state)
        next_state_index = self.state_to_index(next_state) if next_state is not None else None
        next_max = np.max(self.q_table[next_state_index]) if next_state_index is not None else 0
        self.q_table[state_index, action] += self.learning_rate * (reward + self.discount_rate * next_max - self.q_table[state_index, action])
    
    def update_exploration_rate(self):
        self.exploration_rate = max(self.min_exploration_rate, self.exploration_rate * self.exploration_decay_rate)

    def state_to_index(self, state):
        state_key = str(state)
        if state_key not in self.state_to_index_mapping:
            self.state_to_index_mapping[state_key] = self.next_state_index
            self.next_state_index += 1
        return self.state_to_index_mapping[state_key]

# Step 4: Simulation of the training

def train(agent, environment, episodes=10):
    total_rewards = []  
    correct_actions_count = []  
    exploration_rate_progress = []  

    for episode in range(episodes):
        state = environment.reset()
        done = False
        total_reward = 0
        correct_action_count = 0

        while not done:
            action = agent.choose_action(state)
            next_state, reward, done = environment.step(action)
            if reward > 0:
                correct_action_count += 1
            agent.update_q_table(state, action, reward, next_state)
            total_reward += reward
            state = next_state
        
        agent.update_exploration_rate()
        total_rewards.append(total_reward)
        correct_actions_count.append(correct_action_count)
        exploration_rate_progress.append(agent.exploration_rate)

        print(f"Episode {episode+1}: Total Reward = {total_reward}, Correct Actions = {correct_action_count}, Exploration Rate = {agent.exploration_rate}")

    return total_rewards, correct_actions_count, exploration_rate_progress

# Initialize the environment and the agent
env = Environment(X_train, y_train)
agent = Agent(len(X_train), 2)  

# Start the training
total_rewards, correct_actions_count, exploration_rate_progress = train(agent, env)

print("Training completed.")

Episode 1: Total Reward = -141, Correct Actions = 3929, Exploration Rate = 0.99
Episode 2: Total Reward = 283, Correct Actions = 4141, Exploration Rate = 0.9801
Episode 3: Total Reward = 279, Correct Actions = 4139, Exploration Rate = 0.9702989999999999
Episode 4: Total Reward = 253, Correct Actions = 4126, Exploration Rate = 0.96059601
Episode 5: Total Reward = 189, Correct Actions = 4094, Exploration Rate = 0.9509900498999999
Episode 6: Total Reward = 451, Correct Actions = 4225, Exploration Rate = 0.9414801494009999
Episode 7: Total Reward = 415, Correct Actions = 4207, Exploration Rate = 0.9320653479069899
Episode 8: Total Reward = 637, Correct Actions = 4318, Exploration Rate = 0.92274469442792
Episode 9: Total Reward = 687, Correct Actions = 4343, Exploration Rate = 0.9135172474836407
Episode 10: Total Reward = 619, Correct Actions = 4309, Exploration Rate = 0.9043820750088043
Training completed.
