In [24]:
import pandas as pd
import os
import sys
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import dill
from typing import List
import scipy.stats as sts
import pickle
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import ModelCheckpoint

import util
from pathlib import Path
from sk.replace_column_dataframe import ReplaceColumnDataFrame
from sk.norm_standard_scaler import NormStandardScaler
import constants.columns_dataframe as const
from class_manipulates_path import ManipulatePath
from class_preprocessing_refactor import Preprocessing
from class_format_data import FormatData
util.init()

current_path = sys.path[0]

In [25]:
import tensorflow as tf

# Verifique se a GPU está disponível
print("GPU disponível:", tf.config.list_physical_devices('GPU'))

# Verifique se o TensorFlow está usando a GPU
print("TensorFlow está usando a GPU:", tf.test.is_built_with_cuda())

GPU disponível: []
TensorFlow está usando a GPU: False


In [26]:
print("Configurações do TensorFlow relacionadas à GPU:")
print(tf.config.list_physical_devices('GPU'))
print("TensorFlow está usando a GPU:", tf.test.is_built_with_cuda())
print("Número de GPUs disponíveis:", len(tf.config.experimental.list_physical_devices('GPU')))

Configurações do TensorFlow relacionadas à GPU:
[]
TensorFlow está usando a GPU: False
Número de GPUs disponíveis: 0


In [27]:
def plot_variable(df, column_name, intervalos=None):
    """
    Plota um gráfico de dispersão para uma variável de um DataFrame.

    Parâmetros:
        - df: DataFrame pandas contendo os dados.
        - column_name: Nome da coluna que você deseja plotar.
        - intervalos: Lista de intervalos para plotar linhas verticais.
    """
    plt.figure(figsize=(20, 10))
    plt.scatter(df.index.values, df[column_name].values, color='darkcyan', alpha=0.5)
    plt.xlabel('Índice')
    plt.ylabel(column_name)
    plt.title(f'Gráfico de dispersão para a variável "{column_name}"')


    # Plotar linhas verticais para cada intervalo
    if intervalos:
        for intervalo in intervalos:
            plt.axvline(x=intervalo, linestyle='--', color='red')

    plt.show()

In [28]:
def mapping_labels(df: pd.DataFrame):
    df_data = df.copy()
    unique_labels = df_data['class'].unique()
    unique_labels.sort()

    # Mapear os rótulos para inteiros em ordem crescente
    label_mapping = {label: i for i, label in enumerate(unique_labels)}

    # Aplicar o mapeamento aos rótulos verdadeiros
    labels_int = df_data['class'].map(label_mapping)

    # Criar DataFrame com rótulos inteiros
    df_mapped = pd.DataFrame({'class': df_data['class'], 'mapped_class': labels_int})
    df_data['class'] = df_mapped["mapped_class"]
    return df_data, label_mapping

def inverse_mapping_labels(df: pd.DataFrame, label_mapping: dict):
    df_data = df.copy()
    # Inverter o mapeamento original
    inverse_label_mapping = {v: k for k, v in label_mapping.items()}

    # Aplicar o mapeamento inverso aos rótulos
    labels_original = df_data['class'].map(inverse_label_mapping)

    # Atualizar o DataFrame com os rótulos originais
    df_data['class'] = labels_original
    return df_data

In [29]:
manipulate_path = ManipulatePath()

preprocessing = Preprocessing()

format_data = FormatData()

path_raw_data = manipulate_path.get_path_raw_data()

In [30]:
df_preprocessing = pd.read_parquet(manipulate_path.get_path_preprocessing_real_data_all_classes())

In [31]:
contagem_classes = df_preprocessing['class'].value_counts()

# Calcular a porcentagem de cada classe
porcentagem_classes_real = contagem_classes / len(df_preprocessing) * 100
porcentagem_classes_real

class
0.0    32.679781
5.0    30.983203
7.0    29.009351
1.0     4.797527
6.0     1.810310
2.0     0.719828
Name: count, dtype: float64

Aqui é possível ver que tem poquíssimas classes normais!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

# Rede 1

In [32]:
df_preprocessing, label_mapping = mapping_labels(df_preprocessing)

In [33]:
label_mapping

{0.0: 0, 1.0: 1, 2.0: 2, 5.0: 3, 6.0: 4, 7.0: 5}

In [34]:
contagem_classes = df_preprocessing['class'].value_counts()

# Calcular a porcentagem de cada classe
porcentagem_classes_real = contagem_classes / len(df_preprocessing) * 100
porcentagem_classes_real

class
0    32.679781
3    30.983203
5    29.009351
1     4.797527
4     1.810310
2     0.719828
Name: count, dtype: float64

In [35]:
# Separar as features (X) e os rótulos (y)
X = df_preprocessing.drop('class', axis=1)
y = df_preprocessing['class']

In [36]:
X

Unnamed: 0_level_0,P-TPT,T-TPT,P-MON-CKP,T-JUS-CKP
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2014-01-24 09:33:03,18433410.0,116.8718,9397031.0,74.80031
2014-01-24 09:33:04,18433410.0,116.8718,9397031.0,74.80031
2014-01-24 09:33:05,18433410.0,116.8718,9397032.0,74.80031
2014-01-24 09:33:06,18433410.0,116.8718,9397033.0,74.80031
2014-01-24 09:33:07,18433410.0,116.8718,9397033.0,74.80031
...,...,...,...,...
2019-04-03 14:59:56,8489354.0,109.7213,1496222.0,73.38219
2019-04-03 14:59:57,8489349.0,109.7212,1495828.0,73.38310
2019-04-03 14:59:58,8489344.0,109.7210,1495433.0,73.38401
2019-04-03 14:59:59,8489338.0,109.7209,1495039.0,73.38493


In [37]:
X_train_k, X_test, y_train_k, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

In [38]:
X_train, X_val, y_train, y_val = train_test_split(X_train_k, y_train_k, test_size=0.2, stratify=y_train_k, random_state=42)

In [39]:
scaler = NormStandardScaler(X_train.columns)
X_train_norm = scaler.fit_transform(X_train)

X_train_k_norm = scaler.transform(X_train_k)

X_val_norm = scaler.transform(X_val)

X_test_norm = scaler.transform(X_test)

In [40]:
# with open('scaler.pkl', 'wb') as file:
#     pickle.dump(scaler, file)

In [41]:
from sklearn.model_selection import KFold
k = 5
kf = KFold(n_splits=k, shuffle=True, random_state=42)

In [43]:
accuracies = []
histories = []

for train_index, val_index in kf.split(X_train_k_norm):
    # Divida os dados em treino e validação
    X_train_fold, X_val_fold = X_train_k_norm.iloc[train_index], X_train_k_norm.iloc[val_index]
    y_train_fold, y_val_fold = y_train_k.iloc[train_index], y_train_k.iloc[val_index]

    model_2 = Sequential()

    # Adicione as camadas ocultas
    model_2.add(Dense(32, input_dim=4, activation='relu'))  # Camada de entrada com 8 neurônios e ativação ReLU
    model_2.add(Dense(16, activation='relu'))  # Segunda camada oculta com 3 neurônios e ativação ReLU (opcional)
    model_2.add(Dense(8, activation='relu'))

    # Adicione a camada de saída
    model_2.add(Dense(6, activation='softmax'))  # Camada de saída com 5 neurônios (um para cada classe) e ativação Softmax para classificação multiclasse

    # Compile o model_2o
    model_2.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Defina o checkpoint para salvar os pesos
    checkpoint = ModelCheckpoint("weights_only.h5", monitor='val_loss', save_best_only=True)

    # Treine o model_2o com conjunto de validação e o checkpoint
    history = model_2.fit(X_train_fold, y_train_fold, epochs=100, batch_size=64, validation_data=(X_val_fold, y_val_fold), callbacks=[checkpoint])
    histories.append(history.history)

    # Avalie o modelo
    _, accuracy = model_2.evaluate(X_val_fold, y_val_fold, verbose=0)
    accuracies.append(accuracy)
    print(f'Acurácia do fold: {accuracy*100:.2f}%')

Epoch 1/100
Epoch 2/100
 135/8556 [..............................] - ETA: 9s - loss: 0.1964 - accuracy: 0.9243

  saving_api.save_model(


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 

  saving_api.save_model(


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 

  saving_api.save_model(


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 

  saving_api.save_model(


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 

  saving_api.save_model(


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 

In [44]:
mean_accuracy = np.mean(accuracies)
std_accuracy = np.std(accuracies)
print(f'\nAcurácia média: {mean_accuracy*100:.2f}%')
print(f'Desvio padrão da acurácia: {std_accuracy*100:.2f}%')


Acurácia média: 98.04%
Desvio padrão da acurácia: 0.28%


In [45]:
import json

# Salvar accuracies em um arquivo JSON
with open('accuracies_rede_unica.json', 'w') as f:
    json.dump(accuracies, f)

# Salvar histories em um arquivo JSON
with open('histories_rede_unica.json', 'w') as f:
    json.dump(histories, f)

In [21]:
#model_2 = Sequential()
#
## Adicione as camadas ocultas
#model_2.add(Dense(12, input_dim=4, activation='relu'))  # Camada de entrada com 8 neurônios e ativação ReLU
#model_2.add(Dense(12, activation='relu'))  # Segunda camada oculta com 3 neurônios e ativação ReLU (opcional)
#model_2.add(Dense(8, activation='relu'))
#model_2.add(Dense(8, activation='relu'))
#
## Adicione a camada de saída
#model_2.add(Dense(6, activation='softmax'))  # Camada de saída com 5 neurônios (um para cada classe) e ativação Softmax para classificação multiclasse
#
## Compile o model_2o
#model_2.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
#
## Defina o checkpoint para salvar os pesos
#checkpoint = ModelCheckpoint("weights_only.h5", monitor='val_loss', save_best_only=True)
#
## Treine o model_2o com conjunto de validação e o checkpoint
#model_2.fit(X_train_norm, y_train, epochs=100, batch_size=10, validation_data=(X_val_norm, y_val), callbacks=[checkpoint])

Epoch 1/100
Epoch 2/100
   99/47909 [..............................] - ETA: 1:14 - loss: 0.1617 - accuracy: 0.9273

  saving_api.save_model(


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 

<keras.src.callbacks.History at 0x1ddca8d8d00>

In [46]:
y_pred = model_2.predict(X_test_norm)
y_pred = np.argmax(y_pred, axis=1)

accuracy_score(y_test, y_pred)



0.9807957179871812

In [47]:
precision = precision_score(y_test, y_pred, average=None)
recall = recall_score(y_test, y_pred, average=None)
f1 = f1_score(y_test, y_pred, average=None)
for i in range(len(precision)):
    print(f'Classe {np.unique(y_test)[i]}:')
    print(f'Precision: {precision[i]}')
    print(f'Recall: {recall[i]}')
    print(f'F1-score: {f1[i]}\n')

Classe 0:
Precision: 0.9643059315143219
Recall: 0.9774140377232515
F1-score: 0.970815739709349

Classe 1:
Precision: 1.0
Recall: 1.0
F1-score: 1.0

Classe 2:
Precision: 1.0
Recall: 0.9919507575757576
F1-score: 0.9959591157594485

Classe 3:
Precision: 0.9891425694459646
Recall: 0.9944322183098592
F1-score: 0.9917803408579613

Classe 4:
Precision: 0.9925182122465053
Recall: 0.9493408662900188
F1-score: 0.9704495139089422

Classe 5:
Precision: 0.9864151575722031
Recall: 0.968550946057116
F1-score: 0.9774014314601012

