# Installing Requirements

In [None]:
# !pip install stable-baselines3
# !pip install gym

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

# Enable inline plotting in Jupyter Notebook
%matplotlib inline

# Format pandas output to show 3 decimal places
pd.set_option('display.float_format', lambda x: '%.3f' % x)

# Set default plot size for all matplotlib figures
plt.rcParams["figure.figsize"] = (10, 6)

# View all columns when printing
pd.set_option("display.max_columns", None) 

# Importing Dataset

In [None]:
df_train = pd.read_csv("/kaggle/input/nslkdd/KDDTrain+.txt")
df_test = pd.read_csv("/kaggle/input/nslkdd/KDDTest+.txt")

In [None]:
col_names = ["duration", "protocol_type", "service", "flag", "src_bytes",
                     "dst_bytes", "land_f", "wrong_fragment", "urgent", "hot", "num_failed_logins",
                     "logged_in", "num_compromised", "root_shell", "su_attempted", "num_root",
                     "num_file_creations", "num_shells", "num_access_files", "num_outbound_cmds",
                     "is_host_login", "is_guest_login", "count", "srv_count", "serror_rate",
                     "srv_serror_rate", "rerror_rate", "srv_rerror_rate", "same_srv_rate",
                     "diff_srv_rate", "srv_diff_host_rate", "dst_host_count", "dst_host_srv_count",
                     "dst_host_same_srv_rate", "dst_host_diff_srv_rate", "dst_host_same_src_port_rate",
                     "dst_host_srv_diff_host_rate", "dst_host_serror_rate", "dst_host_srv_serror_rate",
                     "dst_host_rerror_rate", "dst_host_srv_rerror_rate", "labels", "dificulty"]
df_train.columns = col_names
df_test.columns = col_names

In [None]:
print(df_train.shape)
print(df_train.shape)

In [None]:
df_train.sample(3)

In [None]:
df_test.sample(3)

In [None]:
# df_train.drop(["dificulty"], axis=1, inplace=True)
# df_test.drop(["dificulty"], axis=1, inplace=True)

### One-hot Encoding

In [2]:
# df_train.select_dtypes(include='object').columns

In [None]:
categorical_columns = ['protocol_type', 'service', 'flag']
df_train = pd.get_dummies(df_train,columns=categorical_columns)
df_test = pd.get_dummies(df_test,columns=categorical_columns)

##### making train and test the same shape

In [None]:
df_train_column_set = set(df_train.columns)
df_test_column_set = set(df_test.columns)

for s in df_train_column_set - df_test_column_set:
    df_test[s] = False

#### Feature Selection

1 - Features based on related articles

In [None]:
# f = ["dst_host_serror_rate", "service_private", "count","dst_host_count", "service_domain_u", "flag_REJ", "dst_host_diff_srv_rate"]
# important_features = f + ["labels"]
# important_features = list(set(important_features))

In [3]:
# df_train = df_train[important_features]
# df_test = df_test[important_features]

### Converting label (actions) to Normal/Attack

In [None]:
df_train['labels'] = df_train['labels'].astype('object')
atk_idx = df_train.loc[df_train['labels'] != 'normal'].index
df_train.loc[atk_idx, 'labels'] = 1.0
df_train.loc[df_train.index.difference(atk_idx), 'labels'] = 0.0
df_train['labels'] = df_train['labels'].astype(dtype=np.float32)

In [None]:
df_test['labels'] = df_test['labels'].astype('object')
atk_idx = df_test.loc[df_test['labels'] != 'normal'].index
df_test.loc[atk_idx, 'labels'] = 1.0
df_test.loc[df_test.index.difference(atk_idx), 'labels'] = 0.0
df_test['labels'] = df_test['labels'].astype(dtype=np.float32)

### Seprating Dataset into State, Action And Spliting Train and Test

In [None]:
train_col = 'labels'
train_cols = df_train.loc[:, df_train.columns != 'labels'].columns
test_col = 'labels'
test_cols = df_train.loc[:, df_test.columns != 'labels'].columns

In [None]:
x_train = df_train[train_cols]
y_train = df_train[train_col]
x_test = df_test[test_cols]
y_test = df_test[test_col]

In [None]:
y_train = y_train.to_frame()
y_test = y_test.to_frame()
x_train = x_train.set_index([pd.Index(range (0, len(x_train)))])
y_train = y_train.set_index([pd.Index(range (0, len(y_train)))])
x_test = x_test.set_index([pd.Index(range (0, len(x_test)))])
y_test = y_test.set_index([pd.Index(range (0, len(y_test)))])

# Environment

In [None]:
import gymnasium as gym
from gymnasium.utils.env_checker import check_env
import random
from typing import Optional

In [None]:
class IdsEnv(gym.Env):
    def __init__(self, dataset=(x_train, y_train), random=True):
        self.action_space = gym.spaces.Discrete(2) ## normal or malicious
        self.observation_space = gym.spaces.Box(low=float('-inf'), high=float('inf'), shape=(len(x_train),))
        self.step_count = 0

        self.x, self.y = dataset
        self.random = random
        self.dataset_idx = 0
    
    def step(self, action):
        done = False
        reward = int(action == self.expected_action)
        current_label = self.expected_action
        obs = self._next_obs()

        self.step_count += 1
        if self.step_count >= self.images_per_episode:
            done = True
        
        return obs, reward, done, {} ,{'label': current_label}

        ###### for algorithms with replay buffer as bellow
        # info = {
        #     'label': current_label,
        #     'TimeLimit.truncated': done and self.step_count >= self.images_per_episode
        # }
        # return obs, reward, done, info, {}

    def _next_obs(self):
        if self.random:
            next_obs_idx = random.randint(0, len(self.x) - 1)
            self.expected_action = int(self.y.iloc[next_obs_idx,:])
            obs = self.x.iloc[next_obs_idx,:]

        else:
            obs = self.x.iloc[self.dataset_idx]
            self.expected_action = int(self.y.iloc[self.dataset_idx])

            self.dataset_idx += 1
            if self.dataset_idx >= len(self.x):
                raise StopIteration()
        return obs
    
    def reset(self,seed: Optional[int] = None, options: Optional[dict] = None):
        super().reset(seed=seed)
        
        
        self.step_count = 0
        obs = self._next_obs()
        return obs,{0:"info"}

In [None]:
env = IdsEnv()

In [None]:
check_env(env)

# Algotihms

All policies consists of Feature extraction and a FNN layer

BaseFeatureExtractor is the base class for all other Feature Extractors

stable_baselines3.common.policies

['ABC',
 'ActorCriticCnnPolicy', == stable_baselines3.ppo.CnnPolicy
 'ActorCriticPolicy', == stable_baselines3.ppo.MlpPolicy
 'Any',
 'BaseFeaturesExtractor',
 'BaseModel',
 'BasePolicy',
 'BernoulliDistribution',
 'CategoricalDistribution',
 'CombinedExtractor',
 'ContinuousCritic',
 'DiagGaussianDistribution',
 'Dict',
 'Distribution',
 'FlattenExtractor',
 'List',
 'MlpExtractor',
 'MultiCategoricalDistribution',
 'MultiInputActorCriticPolicy', == stable_baselines3.ppo.MultiInputPolicy
 'NatureCNN',
 'Optional',
 'Schedule',
 'SelfBaseModel',
 'StateDependentNoiseDistribution',
 'Tuple',
 'Type',
 'TypeVar',
 'Union',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 'abstractmethod',
 'collections',
 'copy',
 'create_mlp',
 'get_action_dim',
 'get_device',
 'is_image_space',
 'is_vectorized_observation',
 'make_proba_distribution',
 'maybe_transpose',
 'nn',
 'np',
 'obs_as_tensor',
 'partial',
 'preprocess_obs',
 'spaces',
 'th',
 'warnings']

## PPO

## DQN

In [1]:
# from stable_baselines3 import DQN

# Check random sample

In [None]:

    s = df_train.sample(3)
    print(s.labels.to_numpy(dtype=np.int64))
    f = torch.tensor(s.drop(columns=["labels"]).to_numpy(dtype=np.float32), dtype=torch.float)
    model.predict(f)[0]