In [1]:
import os
import random
import warnings
from typing import Union, List

import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score, f1_score, confusion_matrix

from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

import matplotlib.pyplot as plt

import torch

In [2]:
folder: str = "/data/experience/CSV"

sub_folders: list = [
    '/1.Deauth',
    '/2.Disas',
    '/3.(Re)Assoc',
    '/4.Rogue_AP',
    '/5.Krack',
    '/6.Kr00k',
    '/7.SSH',
    '/8.Botnet',
    '/9.Malware',
    '/10.SQL_Injection',
    '/11.SSDP',
    '/12.Evil_Twin',
    '/13.Website_spoofing'
]

In [3]:
def load_dataset(num: int = 1, all: bool = False) -> pd.DataFrame:
    if num < 1 or num > 13: return None
    
    path: str = folder + sub_folders[num-1]
    files: list = os.listdir(path)

    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=pd.errors.DtypeWarning)
        df = pd.concat([pd.read_csv(path + '/' + file) for file in files], ignore_index=True) if all else pd.read_csv(path + '/' + files[0])
    
    return df

In [8]:
for idx in [1, 2, 3, 4, 5, 6, 12]:
    df = load_dataset(idx, True)
    df = df.dropna(subset=['Label'])
    
    normal_packets = df[df['Label'] == 'Normal']
    attack_packets = df[df['Label'] != 'Normal']
    
    for num in [3, 5, 10, 20, 50, 100, 150, 200, 300, 500, 800, 1000]:
        num_attacked = max(1, int(num // 33))
        
        normal_sample = normal_packets.sample(num - num_attacked, random_state=42)
        attack_sample = attack_packets.sample(num_attacked, random_state=42)

        df_splitted = pd.concat([normal_sample, attack_sample], ignore_index=True)

        df_splitted.to_csv('/data/experience/wireless/CSV/train/' + sub_folders[idx-1].split('.')[1] + '_' + str(num) + '_train.csv', index=False)

In [None]:
for idx in [1, 2, 3, 4, 5, 6, 12]:
    df = load_dataset(idx, True)
    df = df.dropna(subset=['Label'])
    
    normal_packets = df[df['Label'] == 'Normal']
    attack_packets = df[df['Label'] != 'Normal']
    
    num = 50000
    num_attacked = max(1, int(num // 10))
    
    normal_sample = normal_packets.sample(num-num_attacked, random_state=42)
    attack_sample = attack_packets.sample(num_attacked, random_state=42)

    df_splitted = pd.concat([normal_sample, attack_sample], ignore_index=True)

    df_splitted.to_csv('/data/experience/wireless/CSV/test/' + sub_folders[idx-1].split('.')[1] + '_' + str(num) + '_test.csv', index=False)