In [None]:
%pip install gymnasium

Collecting gymnasium
  Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/953.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m235.5/953.9 kB[0m [31m6.9 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━[0m [32m788.5/953.9 kB[0m [31m11.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m953.9/953.9 kB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
Collecting farama-notifications>=0.0.1 (from gymnasium)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Installing collected packages: farama-notifications, gymnasium
Successfully installed farama-notifications-0.0.4 gymnasium-0.29.1


In [None]:
import gymnasium as gym
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import RandomizedSearchCV, train_test_split
import random
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam


The function representing the value
of an action can be seen as a table that maps all states
and all actions to the expected long-term return. In our
case, the dimension of this table is large and compiling it
requires high computational costs.

In [None]:
class Detector:
    def __init__(self, classifier):
        self.classifier = classifier;

    def predict(self, x):
        y = self.classifier.predict(x);
        return y

    def fit(self, data):
        x = data.drop('Label', axis=1)
        y = data['Label']
        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
        self.classifier.fit(x_train, y_train)

In [None]:
def normalize(data):
    mean = data.mean(axis=0)
    std = data.std(axis=0)
    data -= mean
    data /= std
    return data

In [None]:
class Enviroment(gym.Env):
    def __init__(self, detector, attack_traffic):
        super().__init__()
        self.detector = detector
        self.start = attack_traffic
        self.state = attack_traffic
        self.space = [{"Name": "Flow Duration", "Action": "+Flow Duration", "Value": 0.01},
                      {"Name": "Flow Duration", "Action": "-Flow Duration", "Value": -0.01},
                      {"Name": "TotLen Fwd Pkts", "Action": "+TotLen Fwd Pkts", "Value": 0.01},
                      {"Name": "TotLen Fwd Pkts", "Action": "-TotLen Fwd Pkts", "Value": -0.01},
                      {"Name": "TotLen Bwd Pkts", "Action": "+TotLen Bwd Pkts", "Value": 0.01},
                      {"Name": "TotLen Bwd Pkts", "Action": "-TotLen Bwd Pkts", "Value": -0.01},
                      {"Name": "Flow Byts/s", "Action": "+Flow Byts/s", "Value": 0.01},
                      {"Name": "Flow Byts/s", "Action": "-Flow Byts/s", "Value": -0.01},
                      {"Name": "Flow Pkts/s", "Action": "+Flow Pkts/s", "Value": 0.01},
                      {"Name": "Flow Pkts/s", "Action": "-Flow Pkts/s", "Value": -0.01},
                      {"Name": "Bwd/Fwd Ratio", "Action": "+Bwd/Fwd Ratio", "Value": 0.01},
                      {"Name": "Bwd/Fwd Ratio", "Action": "-Bwd/Fwd Ratio", "Value": -0.01},
                      {"Name": "Pkt Size Avg", "Action": "+Pkt Size Avg", "Value": 0.01},
                      {"Name": "Pkt Size Avg", "Action": "-Pkt Size Avg", "Value": -0.01}]

    def reset(self):
        self.state = self.start
        return self.state

    def step(self, action_n):
        action = self.space[action_n]
        self.state[action["Name"]] += action["Value"]
        result = self.detector.predict(self.state)
        return (self.state, 1 if result == 0 else 0, False)

In [None]:
class DQLAgent:
    def __init__(self, state_size, action_size=state_size*2):
        self.state_size = state_size # Сколько фич изменяем
        self.action_size = action_size # В каком объеме меняем
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01
        self.model = self.build_model()

    def build_model(self):
        model = Sequential()
        model.add(Dense(512, input_dim=self.state_size, activation='relu'))
        model.add(Dense(256, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        opt = keras.optimizers.Adam(use_ema=True)
        model.compile(loss='mse', optimizer=opt)
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = (reward + self.gamma * np.amax(self.model.predict(next_state)[0]))
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

In [None]:
class Learner:
  def __init__(self, agent, env):
    self.agent = agent
    self.env = env

  def fit(self, epoch_n):
    state = self.env.reset()
    for _ in range(epoch_n):
      action = self.agent.act(state)
      new_state, reward, done = self.env.step(action)
      self.agent.remember(state, action, reward, new_state, done)
      self.agent.replay(10)

In [None]:
data = pd.read_csv('data.csv')
data = normalize(data)
forest = RandomForestClassifier()
detector = Detector(forest)
detector.fit(data)

In [None]:
env = Enviroment(detector, None)
agent = DQLAgent(7)
learner = Learner(agent, env)
learner.fit(100)

In [None]:
# class CEM():
#     def __init__(self, state_n, action_n):
#       self.state_n = state_n
#       self.action_n = action_n
#       self.policy = np.ones((self.state_n, self.action_n)) / self.action_n

#     def get_action(self, state):
#       return int(np.random.choice(np.arange(self.action_n), p=self.policy[state]))

#     def update_policy(self, elite_tr):
#       pre_policy = np.zeros((state_n, action_n))

#       for tr in elite_tr:
#         for state, action in zip(tr['states'], tr['actions']):
#           pre_policy[state][action] += 1

#           for state in range(self.action_n):
#             if sum(pre_policy[state]) == 0:
#               self.policy[state] = np.ones(self.action_n) / self.action_n
#             else:
#               self.policy[state] = pre_policy[state] / sum(pre_policy[state])

In [None]:
# class CEMAgent():
#     def __init__(self, env, agent_logic):
#       self.env = env
#       self.agent_logic = agent_logic

#     def get_state(self):
#       return None

#     def get_trajectory(self, tr_len):
#       tr = {'states': [], 'actions': [], 'total_reward': 0}

#       obs = self.env.reset()
#       state = get_state(obs)
#       tr['states'].append(state)

#       for _ in range(tr_len):
#           action = self.agent_logic.get_action(state)
#           tr['actions'].append(action)

#           obs, reward, done = env.step(action)
#           state = get_state(obs)
#           tr['total_reward'] += reward

#           if done:
#               break

#           tr['states'].append(state)

#         return tr
#       def get_elite_trajectories(self, trajectories, q):
#           total = [tr['total_reward'] for tr in trajectories]
#           quantile = np.quantile(total, q=q)
#           return [tr for tr in trajectories if tr['total_reward'] > quantile]

#       def fit(self, episode_n, trajectory_n, trajectory_len, q):
#           for _ in range(episode_n):
#               trajectories = [get_trajectory(trajectory_len) for _ in range(trajectory_n)]

#               mean_total = np.mean([trajectory['total_reward'] for trajectory in trajectories])
#               print(mean_total)
#               elite = get_elite_trajectories(trajectories, q)

#               if len(elite) > 0:
#                   self.agent_logic.update_policy(elite)