In [16]:
import pandas as pd
import os
import sys
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import dill
from typing import List
import scipy.stats as sts
import pickle
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import load_model

import util
from pathlib import Path
from sk.replace_column_dataframe import ReplaceColumnDataFrame
from sk.norm_standard_scaler import NormStandardScaler
import constants.columns_dataframe as const
from class_manipulates_path import ManipulatePath
from class_preprocessing_refactor import Preprocessing
from class_format_data import FormatData
util.init()

current_path = sys.path[0]

In [17]:
import tensorflow as tf

# Verifique se a GPU está disponível
print("GPU disponível:", tf.config.list_physical_devices('GPU'))

# Verifique se o TensorFlow está usando a GPU
print("TensorFlow está usando a GPU:", tf.test.is_built_with_cuda())

GPU disponível: []
TensorFlow está usando a GPU: False


In [18]:
print("Configurações do TensorFlow relacionadas à GPU:")
print(tf.config.list_physical_devices('GPU'))
print("TensorFlow está usando a GPU:", tf.test.is_built_with_cuda())
print("Número de GPUs disponíveis:", len(tf.config.experimental.list_physical_devices('GPU')))

Configurações do TensorFlow relacionadas à GPU:
[]
TensorFlow está usando a GPU: False
Número de GPUs disponíveis: 0


In [19]:
def plot_variable(df, column_name, intervalos=None):
    """
    Plota um gráfico de dispersão para uma variável de um DataFrame.

    Parâmetros:
        - df: DataFrame pandas contendo os dados.
        - column_name: Nome da coluna que você deseja plotar.
        - intervalos: Lista de intervalos para plotar linhas verticais.
    """
    plt.figure(figsize=(20, 10))
    plt.scatter(df.index.values, df[column_name].values, color='darkcyan', alpha=0.5)
    plt.xlabel('Índice')
    plt.ylabel(column_name)
    plt.title(f'Gráfico de dispersão para a variável "{column_name}"')


    # Plotar linhas verticais para cada intervalo
    if intervalos:
        for intervalo in intervalos:
            plt.axvline(x=intervalo, linestyle='--', color='red')

    plt.show()

In [20]:
def mapping_labels(df: pd.DataFrame):
    df_data = df.copy()
    unique_labels = df_data['class'].unique()
    unique_labels.sort()

    # Mapear os rótulos para inteiros em ordem crescente
    label_mapping = {label: i for i, label in enumerate(unique_labels)}

    # Aplicar o mapeamento aos rótulos verdadeiros
    labels_int = df_data['class'].map(label_mapping)

    # Criar DataFrame com rótulos inteiros
    df_mapped = pd.DataFrame({'class': df_data['class'], 'mapped_class': labels_int})
    df_data['class'] = df_mapped["mapped_class"]
    return df_data, label_mapping

def inverse_mapping_labels(df: pd.DataFrame, label_mapping: dict):
    df_data = df.copy()
    # Inverter o mapeamento original
    inverse_label_mapping = {v: k for k, v in label_mapping.items()}

    # Aplicar o mapeamento inverso aos rótulos
    labels_original = df_data['class'].map(inverse_label_mapping)

    # Atualizar o DataFrame com os rótulos originais
    df_data['class'] = labels_original
    return df_data

In [21]:
manipulate_path = ManipulatePath()

preprocessing = Preprocessing()

format_data = FormatData()

path_raw_data = manipulate_path.get_path_raw_data()

# Carregando scaler

In [22]:
with open('scaler.pkl', 'rb') as file:
    loaded_scaler: StandardScaler = pickle.load(file)

# Avaliação com dados desenhados

In [62]:
df_preprocessing = pd.read_parquet(manipulate_path.get_path_preprocessing_draw_data())

In [63]:
contagem_classes = df_preprocessing['class'].value_counts()

# Calcular a porcentagem de cada classe
porcentagem_classes_real = contagem_classes / len(df_preprocessing) * 100
porcentagem_classes_real

class
7    76.090745
1    19.606371
0     4.302884
Name: count, dtype: float64

In [64]:
label_mapping = {0.0: 0, 1.0: 1, 2.0: 2, 5.0: 3, 6.0: 4, 7.0: 5}
label_mapping

{0.0: 0, 1.0: 1, 2.0: 2, 5.0: 3, 6.0: 4, 7.0: 5}

In [65]:
df_preprocessing['class'] = df_preprocessing['class'].replace(7, 5)

In [27]:
contagem_classes = df_preprocessing['class'].value_counts()

# Calcular a porcentagem de cada classe
porcentagem_classes_real = contagem_classes / len(df_preprocessing) * 100
porcentagem_classes_real

class
5    76.090745
1    19.606371
0     4.302884
Name: count, dtype: float64

In [28]:
# Separar as features (X) e os rótulos (y)
X = df_preprocessing.drop('class', axis=1)
y = df_preprocessing['class']

In [29]:
X

Unnamed: 0_level_0,P-TPT,T-TPT,P-MON-CKP,T-JUS-CKP
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-09-05 20:44:36,175.3406,114.8907,96.64524,66.75450
2018-09-05 20:49:17,175.3414,114.8907,96.64524,66.75450
2018-09-05 20:49:18,175.3426,114.8907,96.64524,66.75450
2018-09-05 20:49:19,175.3437,114.8907,96.64524,66.75450
2018-09-05 20:49:20,175.3449,114.8907,96.64524,66.75450
...,...,...,...,...
2018-08-21 11:11:33,211.8868,117.8766,122.91480,65.02185
2018-08-21 11:11:35,211.8868,117.8766,122.91490,65.02185
2018-08-21 11:11:36,211.8868,117.8766,122.91500,65.02185
2018-08-21 11:11:38,211.8869,117.8766,122.91510,65.02185


In [30]:
X_norm = loaded_scaler.transform(X)

In [31]:
model_1_load = load_model(os.path.join(Path(os.getcwd()), "peso_rede_unica.h5"))

In [32]:
np.unique(y)

array([0, 1, 5], dtype=int64)

In [33]:
y_pred = model_1_load.predict(X_norm)
y_pred = np.argmax(y_pred, axis=1)

accuracy_score(y, y_pred)



0.014119903144010381

In [34]:
np.unique(y_pred)

array([0, 1, 2, 3, 4], dtype=int64)

In [35]:
f1_score(y, y_pred, average="macro")

0.01683162433136606

In [39]:
precision = precision_score(y, y_pred, average=None)
recall = recall_score(y, y_pred, average=None)
f1 = f1_score(y, y_pred, average=None)
for i in range(len(precision)):
    print(f'Classe {i}:')
    print(f'Precision: {precision[i]}')
    print(f'Recall: {recall[i]}')
    print(f'F1-score: {f1[i]}\n')

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classe 0:
Precision: 0.05896501596129623
Recall: 0.3255404901686702
F1-score: 0.09984512519493326

Classe 1:
Precision: 1.0
Recall: 0.0005726381233830643
F1-score: 0.0011446207932630892

Classe 2:
Precision: 0.0
Recall: 0.0
F1-score: 0.0

Classe 3:
Precision: 0.0
Recall: 0.0
F1-score: 0.0

Classe 4:
Precision: 0.0
Recall: 0.0
F1-score: 0.0

Classe 5:
Precision: 0.0
Recall: 0.0
F1-score: 0.0



# Avaliação com os dados simulados

In [58]:
df_preprocessing = pd.read_parquet(manipulate_path.get_path_preprocessing_simulated_data())

In [59]:
contagem_classes = df_preprocessing['class'].value_counts()

# Calcular a porcentagem de cada classe
porcentagem_classes_real = contagem_classes / len(df_preprocessing) * 100
porcentagem_classes_real

class
5    55.980035
1    40.844868
2     2.312167
0     0.862930
Name: count, dtype: float64

In [60]:
label_mapping

{0.0: 0, 1.0: 1, 2.0: 2, 5.0: 3, 6.0: 4, 7.0: 5}

In [61]:
df_preprocessing['class'] = df_preprocessing['class'].replace(5, 3)

In [44]:
# Separar as features (X) e os rótulos (y)
X = df_preprocessing.drop('class', axis=1)
y = df_preprocessing['class']

In [45]:
X_norm = loaded_scaler.transform(X)

In [47]:
np.unique(y)

array([0, 1, 2, 3], dtype=int64)

In [48]:
y_pred = model_1_load.predict(X_norm)
y_pred = np.argmax(y_pred, axis=1)

accuracy_score(y, y_pred)



0.3756901597246249

In [49]:
np.unique(y_pred)

array([0, 1, 2, 3, 4, 5], dtype=int64)

In [50]:
np.unique(y)

array([0, 1, 2, 3], dtype=int64)

In [51]:
precision = precision_score(y, y_pred, average=None)
recall = recall_score(y, y_pred, average=None)
f1 = f1_score(y, y_pred, average=None)
for i in range(len(precision)):
    print(f'Classe {np.unique(y_pred)[i]}:')
    print(f'Precision: {precision[i]}')
    print(f'Recall: {recall[i]}')
    print(f'F1-score: {f1[i]}\n')

  _warn_prf(average, modifier, msg_start, len(result))


Classe 0:
Precision: 0.006198924133317827
Recall: 0.09706060223428788
F1-score: 0.01165357484679

Classe 1:
Precision: 0.5056860010767512
Recall: 0.8032740252696354
F1-score: 0.6206521535134096

Classe 2:
Precision: 0.9808749668490312
Recall: 0.8898093421597637
F1-score: 0.9331255998564246

Classe 3:
Precision: 0.2785829828337048
Recall: 0.046771138422515415
F1-score: 0.08009514803109836

Classe 4:
Precision: 0.0
Recall: 0.0
F1-score: 0.0

Classe 5:
Precision: 0.0
Recall: 0.0
F1-score: 0.0



# Avaliação com todos os dados de treinamento

In [75]:
df_preprocessing = pd.read_parquet(manipulate_path.get_path_preprocessing_real_data_all_classes())

In [76]:
contagem_classes = df_preprocessing['class'].value_counts()

# Calcular a porcentagem de cada classe
porcentagem_classes_real = contagem_classes / len(df_preprocessing) * 100
porcentagem_classes_real

class
0.0    32.679781
5.0    30.983203
7.0    29.009351
1.0     4.797527
6.0     1.810310
2.0     0.719828
Name: count, dtype: float64

In [77]:
df_preprocessing, label_mapping_2 = mapping_labels(df_preprocessing)

In [78]:
contagem_classes = df_preprocessing['class'].value_counts()

# Calcular a porcentagem de cada classe
porcentagem_classes_real = contagem_classes / len(df_preprocessing) * 100
porcentagem_classes_real

class
0    32.679781
3    30.983203
5    29.009351
1     4.797527
4     1.810310
2     0.719828
Name: count, dtype: float64

In [79]:
# Separar as features (X) e os rótulos (y)
X = df_preprocessing.drop('class', axis=1)
y = df_preprocessing['class']

X_norm = loaded_scaler.transform(X)

y_pred = model_1_load.predict(X_norm)
y_pred = np.argmax(y_pred, axis=1)

accuracy_score(y, y_pred)



0.9790617683968936