In [None]:
from google.colab import drive
import pandas as pd

drive.mount("/content/drive")

file_name = "NovoDataset2.csv"
test_size = 0.4

data_dir = "/content/drive/MyDrive/Mestrado/Projeto/LSTMLevel/data"
model_dir = "/content/drive/MyDrive/Mestrado/Projeto/LSTMLevel/model"

Mounted at /content/drive


In [None]:
import joblib
from pathlib import Path
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from datetime import datetime, timedelta
import numpy as np

def load_data(file_name):
    data = pd.read_csv(Path(data_dir, file_name))
    return data


def save_data(df, file_name):
    df.astype(float).to_csv(Path(data_dir, file_name), index=False)
    return None

def adicionar_coluna_trip(df):
    label_column = []
    num_anterior = df['Valvula.VALVE_SHUTOFF.L'][0]

    for valor in df['Valvula.VALVE_SHUTOFF.L']:
        if valor == 0 and num_anterior == 1:
            label_column.append(1)
        else:
            label_column.append(0)

        num_anterior = valor

    df['Trip'] = label_column

    return df

def adicionar_coluna_label(df):
    df['Label'] = df['Drum.V1.L']

    return df

def change_datetime(df):
    data_atual = datetime.now()
    segundos = [timedelta(seconds=i) for i in range(len(df))]
    df['Datetime'] = data_atual + pd.to_timedelta(segundos)

    return df

def arredondar_dataframe(df, casas_decimais=3):
    return df.round(casas_decimais)

def create_features(df):
    df['Datetime'] = pd.to_datetime(df['Datetime'])

    df = adicionar_coluna_trip(df)
    df = adicionar_coluna_label(df)

    df = df.drop(columns=['Datetime'])

    contagem = (df['Trip'] == 1).sum()
    print('{} eventos de trip'.format(contagem))
    print('{} Max'.format(contagem.max()))

    return df

def rescale_data(df):
    scaler = MinMaxScaler()
    scaler = scaler.fit(df)

    df_scaled = pd.DataFrame(
        scaler.transform(df),
        index=df.index,
        columns=df.columns)

    joblib.dump(scaler, Path(model_dir, 'scaler_teste2_escrita.gz'))

    return df_scaled

def split_data(df, test_size):
    train, test = train_test_split(df, test_size=test_size, shuffle=False)
    return train, test

def remove_features(df):
    columns_to_remove = [
        'Variaveis.TR1_RUIDO',
        'Variaveis.TR2_RUIDO',
        'Variaveis.TR3_RUIDO',
        'Transmissor.TR1.OUT',
        'Transmissor.TR2.OUT',
        'Transmissor.TR3.OUT',
        'Trip'
    ]

    return df.drop(columns=columns_to_remove)

def aplicar_ruidos(dataset):
    novo_dataset = dataset.copy()

    for i in range(len(dataset)):
        if np.random.rand() < 0.5:
            colunas_ruido = np.random.choice(['Variaveis.TR1_RUIDO', 'Variaveis.TR2_RUIDO', 'Variaveis.TR3_RUIDO'], size=2, replace=False)
            novo_dataset.loc[i, colunas_ruido] = True
            novo_dataset.at[i, 'Variaveis.VOTACAO_TRANSMISSORES'] = novo_dataset.at[i, 'Transmissor.TR1.OUT']

            for coluna in colunas_ruido:
                novo_dataset.at[i, coluna] += np.random.uniform(0.01, 0.2)

        elif np.random.rand() < 0.5:
                coluna_w_s6 = 'Tubo.S6.W'
                quantidade_registros_alterar = np.random.randint(1, 11)
                valor_somar = np.random.randint(0, 2001)
                novo_dataset.loc[i:i + quantidade_registros_alterar, coluna_w_s6] += valor_somar

    return novo_dataset


def prep_data(df, test_size, plot_df=False):
    print("Starting with data preparation...")

    df = aplicar_ruidos(df)

    df = change_datetime(df)
    df = create_features(df)
    df = remove_features(df)
    df = arredondar_dataframe(df)

    train_df, test_df = split_data(df, test_size)

    if plot_df:
        save_data(train_df, 'plot_df.csv')

    train_df = rescale_data(train_df)
    scaler = joblib.load(Path(model_dir, 'scaler.gz'))

    test_df = pd.DataFrame(
        scaler.transform(test_df),
        index=test_df.index,
        columns=test_df.columns)

    save_data(train_df, 'train.csv')
    save_data(test_df, 'test.csv')

    print("Completed.")

In [None]:
df = load_data(file_name)
print(df.shape)
prep_data(df, test_size, validation_size)

(135707, 56)
Starting with data preparation...
12 eventos de trip
12 Max
Completed.


In [None]:
#Lstm Multivariate Multi-Step
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pandas as pd
import statsmodels.api as sm
from pandas import DataFrame , concat
from sklearn.metrics import mean_absolute_error , mean_squared_error
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
from numpy import mean , concatenate
from math import sqrt
from pandas import read_csv
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,LSTM,Activation
from sklearn.preprocessing import LabelEncoder
#from keras.models import Sequential
#from keras.layers import Dense
#from keras.layers import LSTM

from numpy import array , hstack
from tensorflow import keras
import tensorflow as tf

In [None]:
train_df = load_data('train.csv')
test_df = load_data('test.csv')

print(train_df.columns)
print(test_df.columns)

print(train_df.isna().sum().sum())
print(test_df.isna().sum().sum())

colunas_label = ['Label']

X_train, y_train = np.array(train_df.loc[:, ~train_df.columns.isin(colunas_label)]), np.array(train_df[colunas_label])
X_test, y_test = np.array(test_df.loc[:, ~test_df.columns.isin(colunas_label)]), np.array(test_df[colunas_label])

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

Index(['Variaveis.H2O_QTD', 'Valvula.VALVE5.OP',
       'Variaveis.VOTACAO_TRANSMISSORES', 'Valvula.VALVE_RESPIRO.OP',
       'Valvula.VALVE_RESPIRO.L', 'StreamSend.FLARE.P', 'Valvula.VALVE3.OP',
       'Valvula.VALVE3.L', 'StreamSend.SAIDA_GAS.P', 'Valvula.VALVE2.OP',
       'Valvula.VALVE2.L', 'Valvula.VALVE1.OP', 'Valvula.VALVE1.L',
       'Valvula.VALVE_SHUTOFF.OP', 'Valvula.VALVE_SHUTOFF.L',
       'StreamSend.SAIDA_LIQUIDO.P', 'Valvula.VALVE4.OP', 'Valvula.VALVE4.L',
       'StreamSend.SAIDA_VAZAMENTO.P', 'Valvula.VALVE5.L', 'Drum.V1.L',
       'Drum.V1.P', 'Drum.V1.T', 'Tubo.S1.P', 'Tubo.S1.W', 'Tubo.S2.P',
       'Tubo.S2.W', 'Tubo.S6.P', 'Tubo.S6.W', 'Tubo.S5.P', 'Tubo.S5.W',
       'Tubo.S10.P', 'Tubo.S10.W', 'Tubo.S3.P', 'Tubo.S3.W', 'Tubo.S4.P',
       'Tubo.S4.W', 'Tubo.S25.P', 'Tubo.S25.W', 'Tubo.S26.P', 'Tubo.S26.W',
       'Tubo.S27.P', 'Tubo.S27.W', 'Tubo.S28.P', 'Tubo.S28.W',
       'Controlador.PC1.OUT', 'Controlador.PID1.OUT', 'Controlador.PID2.OUT',
       'Control

In [None]:
def split_sequences(df_X, df_y, n_steps_in, n_steps_out):
    X, y = list(), list()
    for i in range(len(df_X)):
        end_ix = i + n_steps_in
        out_end_ix = end_ix + n_steps_out-1
        if out_end_ix > len(df_X):
            break
        seq_x, seq_y = df_X[i:end_ix], df_y[end_ix-1:out_end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)

n_steps_in, n_steps_out = 30, 15

X_train_sequence, y_train_sequence = split_sequences(X_train, y_train, n_steps_in, n_steps_out)
X_test_sequence, y_test_sequence = split_sequences(X_test, y_test, n_steps_in, n_steps_out)
print ("X_train_sequence.shape" , X_train_sequence.shape)
print ("y_train_sequence.shape" , y_train_sequence.shape)
print ("X_test_sequence.shape" , X_test_sequence.shape)
print ("y_test_sequence.shape" , y_test_sequence.shape)

X_train_sequence.shape (108522, 30, 49)
y_train_sequence.shape (108522, 15, 1)
X_test_sequence.shape (27099, 30, 49)
y_test_sequence.shape (27099, 15, 1)


In [None]:
import tensorflow as tf
from keras.layers import Dropout
from tensorflow.keras.layers import LSTM, GRU, Dense, Bidirectional, Dropout, Flatten
from tensorflow.keras.metrics import RootMeanSquaredError, MeanAbsoluteError, Accuracy
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam

def get_model(params, input_shape):
	model = Sequential()
	model.add(LSTM(units=params["lstm_units"], return_sequences=False, input_shape=(n_steps_in, input_shape)))
	#model.add(LSTM(units=params["lstm_units"], return_sequences=False, dropout=params["dropout"]))
	model.add(Dense(n_steps_out))

	model.compile(loss=params["loss"],
              	optimizer=params["optimizer"],
              	metrics=[RootMeanSquaredError(), MeanAbsoluteError()])

	return model

def get_model_2(params, input_shape):
	model = Sequential()
	model.add(Bidirectional(LSTM(units=64, return_sequences=True, input_shape=(n_steps_in, input_shape))))
	model.add(Dropout(0.2))
	model.add(Bidirectional(LSTM(units=32, return_sequences=True)))
	model.add(Dropout(0.2))
	model.add(Flatten())
	model.add(Dense(16))
	model.add(Dense(n_steps_out))

	model.compile(loss=params["loss"],
              	optimizer=params["optimizer"],
              	metrics=[RootMeanSquaredError(), MeanAbsoluteError()])

	return model

def get_model_gru(params, input_shape):
	model = Sequential()
	model.add(GRU(64, input_shape=(n_steps_in, input_shape)))
	model.add(Dense(n_steps_out))

	model.compile(loss=params["loss"],
              	optimizer=params["optimizer"],
              	metrics=[RootMeanSquaredError(), MeanAbsoluteError()])

	return model

In [None]:
params = {
	"loss": "mean_squared_error",
	"optimizer": "adam",
	"dropout": 0.2,
	"lstm_units": 64,
	"epochs": 300,
	"batch_size": 128,
	"es_patience" : 10
}

In [None]:
import tensorflow as tf
print("Versão do TensorFlow:", tf.__version__)

# Verifique a presença de uma GPU
if tf.test.gpu_device_name():
    print("GPU disponível:", tf.test.gpu_device_name())
else:
    print("GPU não encontrada. Certifique-se de que está configurado corretamente.")


Versão do TensorFlow: 2.15.0
GPU disponível: /device:GPU:0


In [None]:
model = get_model_2(params=params, input_shape=X_train_sequence.shape[2])

from keras.callbacks import ModelCheckpoint
from sklearn.model_selection import TimeSeriesSplit

tscv = TimeSeriesSplit(n_splits = 6)

filepath = 'lstmlevel.epoch{epoch:02d}-loss{val_root_mean_squared_error:.5f}.hdf5'
model_checkpoint_callback = ModelCheckpoint(
    filepath=filepath,
	verbose=1,
    monitor='val_root_mean_squared_error',
    mode='min',
    save_best_only=True)
es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_root_mean_squared_error',
                                           	mode='min',
                                            patience=params["es_patience"])

history = model.fit(
	X_train_sequence,
	y_train_sequence,
	validation_data=(X_test_sequence, y_test_sequence),
	epochs=params["epochs"],
	batch_size=params["batch_size"],
	verbose=1,
	# callbacks=[model_checkpoint_callback]
	callbacks=[es_callback,model_checkpoint_callback]
)

# for train_index, test_index in tscv.split(X_train_sequence):
# 	X_train_split, X_val_split = X_train_sequence[train_index], X_train_sequence[test_index]
# 	y_train_split, y_val_split = y_train_sequence[train_index], y_train_sequence[test_index]

# 	model.fit(
# 		X_train_split,
# 		y_train_split,
# 		validation_data=(X_val_split, y_val_split),
# 		epochs=params["epochs"],
# 		batch_size=params["batch_size"],
# 		verbose=1,
# 		callbacks=[model_checkpoint_callback]
# 		#callbacks=[es_callback,model_checkpoint_callback]
# 	)

Epoch 1/300
Epoch 1: val_root_mean_squared_error improved from inf to 0.00597, saving model to lstmlevel-2-escrita.epoch01-loss0.00597.hdf5
Epoch 2/300
 11/848 [..............................] - ETA: 9s - loss: 4.2288e-04 - root_mean_squared_error: 0.0206 - mean_absolute_error: 0.0156

  saving_api.save_model(


Epoch 2: val_root_mean_squared_error improved from 0.00597 to 0.00536, saving model to lstmlevel-2-escrita.epoch02-loss0.00536.hdf5
Epoch 3/300
Epoch 3: val_root_mean_squared_error improved from 0.00536 to 0.00484, saving model to lstmlevel-2-escrita.epoch03-loss0.00484.hdf5
Epoch 4/300
Epoch 4: val_root_mean_squared_error did not improve from 0.00484
Epoch 5/300
Epoch 5: val_root_mean_squared_error did not improve from 0.00484
Epoch 6/300
Epoch 6: val_root_mean_squared_error did not improve from 0.00484
Epoch 7/300
Epoch 7: val_root_mean_squared_error did not improve from 0.00484
Epoch 8/300
Epoch 8: val_root_mean_squared_error did not improve from 0.00484
Epoch 9/300
Epoch 9: val_root_mean_squared_error improved from 0.00484 to 0.00439, saving model to lstmlevel-2-escrita.epoch09-loss0.00439.hdf5
Epoch 10/300
Epoch 10: val_root_mean_squared_error did not improve from 0.00439
Epoch 11/300
Epoch 11: val_root_mean_squared_error did not improve from 0.00439
Epoch 12/300
Epoch 12: val_roo