In [None]:
import sys
print(sys. version)

3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0]


## Importações, carregamento, rotulação e tratamento dos dados

In [None]:
import numpy as np
import pandas as pd
from google.colab import drive
import os
import re
import matplotlib.pyplot as plt # Gráficos
import seaborn as sns # Gráficos
from tqdm.notebook import tqdm # Barras de Progresso

In [None]:
import tensorflow as tf

from sklearn.preprocessing import MinMaxScaler, LabelEncoder, StandardScaler
from keras.models import Sequential, load_model
from keras.layers import LSTM, Dense, RepeatVector, TimeDistributed, Dropout

from sklearn.metrics import silhouette_score
from sklearn.metrics import roc_curve, roc_auc_score

from sklearn.metrics import confusion_matrix
from sklearn.decomposition import PCA

In [None]:
sns.set_theme() # Define o estilo dos gráficos para o tema padrão

In [None]:
# Pegar os dados no Google Drive

from google.colab import drive

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
X_train = pd.read_csv("/content/drive/My Drive/UFPE/UFPE 2024.1/PET/Imersão/Network Databases/normal_network.csv")

In [None]:
# Tratando os valores do timestamp

X_train['Timestamp'] = pd.to_datetime(X_train['Timestamp'], unit='s')
X_train['Timestamp'] = (X_train['Timestamp'] - X_train['Timestamp'].min()).dt.total_seconds()

In [None]:
# Passar os valores inteiros para o tipo float

columns = ['Arbitration ID', 'DLC', 'B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8']
for column in columns:
  X_train[column] = X_train[column].apply(lambda x: float(x))

In [None]:
# Trocando o valor de False para 0.0

X_train['RTR'] = 0.0
X_train['Extended ID'] = 0.0

In [None]:
nan_indices = np.argwhere(np.isnan(X_train))
print(nan_indices)

[[2461907       5]
 [2461907       6]
 [2461907       7]
 [2461907       8]
 [2461907       9]
 [2461907      10]
 [2461907      11]
 [2461907      12]]


In [None]:
# Descartando registros com valores NaN/Null/NA
initial_len = X_train.shape[0]
X_train = X_train.dropna()
print(f'Tamanho inicial: {initial_len}, tamanho final {X_train.shape[0]} | Descartados {initial_len - X_train.shape[0]} registros com valores NA')

Tamanho inicial: 2713011, tamanho final 2713010 | Descartados 1 registros com valores NA


## Definição do Modelo

In [None]:
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)

In [None]:
import joblib
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']

In [None]:
def create_sequences(data, steps):
  sequences = []
  for i in range(len(data) - steps):
    sequences.append(data[i:i+steps])

  return np.array(sequences)

In [None]:
# Criando as sequências

timesteps = 5
X_train = create_sequences(X_train, timesteps)

In [None]:
# Definindo a arquitetura do modelo

model = Sequential()

model.add(LSTM(128, activation='relu', input_shape=(timesteps, X_train.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(64, activation='relu', return_sequences=False))
model.add(Dropout(0.2))

model.add(RepeatVector(timesteps))

model.add(LSTM(64, activation='relu', return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(128, activation='relu', return_sequences=True))
model.add(Dropout(0.2))

model.add(TimeDistributed(Dense(X_train.shape[2])))

In [None]:
model.compile(optimizer='adam', loss='mse')

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 5, 128)            72704     
                                                                 
 dropout (Dropout)           (None, 5, 128)            0         
                                                                 
 lstm_1 (LSTM)               (None, 64)                49408     
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 repeat_vector (RepeatVecto  (None, 5, 64)             0         
 r)                                                              
                                                                 
 lstm_2 (LSTM)               (None, 5, 64)             33024     
                                                        

In [None]:
model.fit(X_train, X_train, epochs=20, batch_size=32, validation_split=.05, shuffle=False)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20

In [None]:
model.save('lstm.keras')