In [None]:
import math
import numpy as np
import pandas as pd
import os
import re
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.svm import OneClassSVM

from sklearn.preprocessing import StandardScaler, MinMaxScaler

from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm

from joblib import dump, load

from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.metrics import confusion_matrix

from keras.models import Model, Sequential
from keras.layers import Input, LSTM, RepeatVector, TimeDistributed, Dense

import tensorflow as tf
from tensorflow.keras import layers, models

import torch
import torch.nn as nn
import torch.optim as optim

from torchsummary import summary

In [None]:
sns.set_theme()
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)

# Carregando os dados

Os valores são números muito pequenos com muitas casas decimais, por isso é bom que o dataframe consiga representar isso também.

In [None]:
pd.set_option('display.float_format', '{:.20f}'.format)

In [None]:
df_benign = pd.read_csv("data/dados benignos/mensagens_benignas.csv")
df_benign

In [None]:
df_malicious_random_dos = pd.read_csv("data/ataques/mensagens_maliciosas_random_dos.csv")
df_malicious_spoofing_zero_payload = pd.read_csv("data/ataques/mensagens_maliciosas_spoofing_zero_payload.csv")
df_malicious_zero_dos = pd.read_csv("data/ataques/mensagens_maliciosas_zero_dos.csv")

In [None]:
df_malicious_random_dos

In [None]:
df_malicious_spoofing_zero_payload

In [None]:
df_malicious_zero_dos

# Tratando dados

## Normalização dos dados

In [None]:
scaler_minmax = MinMaxScaler()

scaler_minmax.fit(df_benign)

df_benign_scaled = pd.DataFrame(scaler_minmax.transform(df_benign), columns=df_benign.columns, index=df_benign.index)
df_malicious_random_dos_scaled = pd.DataFrame(scaler_minmax.transform(df_malicious_random_dos), columns=df_malicious_random_dos.columns, index=df_malicious_random_dos.index)
df_malicious_spoofing_zero_payload_scaled = pd.DataFrame(scaler_minmax.transform(df_malicious_spoofing_zero_payload), columns=df_malicious_spoofing_zero_payload.columns, index=df_malicious_spoofing_zero_payload.index)
df_malicious_zero_dos_scaled = pd.DataFrame(scaler_minmax.transform(df_malicious_zero_dos), columns=df_malicious_zero_dos.columns, index=df_malicious_zero_dos.index)

In [None]:
del df_benign
del df_malicious_random_dos
del df_malicious_spoofing_zero_payload
del df_malicious_zero_dos

In [None]:
df_benign_scaled

## Criando Labels

In [None]:
list_labels_benign = [1] * len(df_benign_scaled)
list_labels_random_dos = [-1] * len(df_malicious_random_dos_scaled)
list_labels_spoofing_zero_payload = [-2] * len(df_malicious_spoofing_zero_payload_scaled)
list_labels_zero_dos = [-3] * len(df_malicious_zero_dos_scaled)

## Criação de Janelas Temporais

In [None]:
def create_dataset(data, labels, time_step=1):
    X, Y = [], []
    for i in range(len(data) - time_step):
        a = data[i:(i + time_step)]
        X.append(a)
        Y.append(labels[i + time_step])
    return np.array(X), np.array(Y)

In [None]:
WINDOW_SIZE = 100

In [None]:
benign_windows, benign_labels = create_dataset(df_benign_scaled, list_labels_benign, WINDOW_SIZE)
malicious_random_dos_windows, malicious_random_dos_labels = create_dataset(df_malicious_random_dos_scaled, list_labels_random_dos, WINDOW_SIZE)
malicious_spoofing_zero_payload_windows, malicious_spoofing_zero_payload_labels = create_dataset(df_malicious_spoofing_zero_payload_scaled, list_labels_spoofing_zero_payload, WINDOW_SIZE)
malicious_zero_dos_windows, malicious_zero_dos_labels = create_dataset(df_malicious_zero_dos_scaled, list_labels_zero_dos, WINDOW_SIZE)

In [None]:
del df_benign_scaled
del df_malicious_random_dos_scaled
del df_malicious_spoofing_zero_payload_scaled
del df_malicious_zero_dos_scaled

del list_labels_benign
del list_labels_random_dos
del list_labels_spoofing_zero_payload
del list_labels_zero_dos

In [None]:
benign_windows

## Dividindo dados em Treino, Validação e Teste

In [None]:
data = np.vstack((benign_windows, malicious_random_dos_windows, malicious_spoofing_zero_payload_windows, malicious_zero_dos_windows))

In [None]:
data_labels = np.hstack((benign_labels, malicious_random_dos_labels, malicious_spoofing_zero_payload_labels, malicious_zero_dos_labels))

In [None]:
del benign_windows
del malicious_random_dos_windows
del malicious_spoofing_zero_payload_windows
del malicious_zero_dos_windows

del benign_labels
del malicious_random_dos_labels
del malicious_spoofing_zero_payload_labels
del malicious_zero_dos_labels

In [None]:
len(data)

In [None]:
len(data_labels)

In [None]:
train_data, val_test_data, train_labels, val_test_labels = train_test_split(data, data_labels, test_size=0.25, random_state=RANDOM_SEED)

In [None]:
val_data, test_data, val_labels, test_labels = train_test_split(val_test_data, val_test_labels, test_size=0.5, random_state=RANDOM_SEED)

In [None]:
del val_test_data, val_test_labels

In [None]:
train_data.shape

# IAs

In [None]:
FEATURES_COUNT = 7 

In [None]:
# Construindo o modelo LSTM
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(WINDOW_SIZE, FEATURES_COUNT)))
model.add(Dense(1))

# Compilando o modelo
model.compile(optimizer='adam', loss='mse')

In [None]:
# Treinando o modelo
historyy = model.fit(train_data, train_labels, epochs=1, batch_size=8, validation_data=(val_data, val_labels))

In [None]:
# Avaliando o modelo no conjunto de teste
loss = model.evaluate(test_data, test_labels)
print("Test Loss:", loss)

In [None]:
predicts = model.predict(test_data)

In [None]:
predicts