In [None]:
from api_dados import *

nome_produto = 'dados-hidrologicos-res'
df = coletar_dados('ons', nome_produto)

df_res = df[df['nom_reservatorio'] == 'ITAPEBI']
df_res_val= df_res['val_vazaonatural'].reset_index(drop=True)
df_res_val

Iniciando download de 26 arquivos...


0        350.72
1        361.78
2        476.52
3        752.56
4       1406.71
         ...   
9486     354.59
9487     338.32
9488     309.49
9489     278.35
9490     260.20
Name: val_vazaonatural, Length: 9491, dtype: float64

In [None]:
from funcoes_modelagem import *

X_seq_lstm, y = X_3d(df_res_val, 30)

X_seq_ffn= X_2d(X_seq_lstm)

input_shape_lstm = (X_seq_lstm.shape[1], X_seq_lstm.shape[2])
input_shape_ffn = (X_seq_ffn.shape[1],)


In [3]:

X_train_lstm, y_train, X_val_lstm, y_val, X_test_lstm, y_test = split_treino_valid_teste(X_seq_lstm, y)

X_train_ffn, y_train, X_val_ffn, y_val, X_test_ffn, y_test = split_treino_valid_teste(X_seq_ffn, y)


In [4]:
from sklearn.preprocessing import StandardScaler

# scaler para X (LSTM) - precisa achatar antes
X_train_lstm_flat = X_train_lstm.reshape(X_train_lstm.shape[0], -1)
X_val_lstm_flat   = X_val_lstm.reshape(X_val_lstm.shape[0], -1)
X_test_lstm_flat  = X_test_lstm.reshape(X_test_lstm.shape[0], -1)

scaler_X_lstm = StandardScaler()
X_train_lstm = scaler_X_lstm.fit_transform(X_train_lstm_flat).reshape(X_train_lstm.shape)
X_val_lstm   = scaler_X_lstm.transform(X_val_lstm_flat).reshape(X_val_lstm.shape)
X_test_lstm  = scaler_X_lstm.transform(X_test_lstm_flat).reshape(X_test_lstm.shape)

# scaler para X (FFN)
scaler_X_ffn = StandardScaler()
X_train_ffn = scaler_X_ffn.fit_transform(X_train_ffn)
X_val_ffn   = scaler_X_ffn.transform(X_val_ffn)
X_test_ffn  = scaler_X_ffn.transform(X_test_ffn)

# scaler para y
scaler_y = StandardScaler()
y_train = scaler_y.fit_transform(y_train.reshape(-1, 1)).ravel()
y_val   = scaler_y.transform(y_val.reshape(-1, 1)).ravel()
y_test  = scaler_y.transform(y_test.reshape(-1, 1)).ravel()

In [5]:
import tensorflow as tf
import numpy as np
import random
import os

seed= 42

os.environ['PYTHONHASHSEED'] = str(seed)
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)


In [None]:
from keras.callbacks import EarlyStopping, ReduceLROnPlateau

# espa√ßo de busca global
busca = {
    "num_layers": (1, 3),                # n√∫mero de camadas minimo e maximo
    "units": (64, 512, 64),              # min, max e step para n√∫mero de neur√¥nios
    "dropout": (0.0, 0.3, 0.1),          # min, max e step para dropout
    "learning_rate": [1e-1, 1e-2, 1e-3],  # op√ß√µes de taxa de aprendizado
    "batch_size": [32, 64, 128, 256]      # op√ß√µes de batch_size 
}

es= EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)
red_lr= ReduceLROnPlateau(monitor="val_loss", factor=0.1, patience=3, min_lr=1e-6)

In [None]:
import keras_tuner as kt

class CustomTuner(kt.RandomSearch):
    def run_trial(self, trial, *args, **kwargs):
        # usa os valores do dicion√°rio busca
        kwargs['batch_size'] = trial.hyperparameters.Choice('batch_size', values=busca["batch_size"])

        return super(CustomTuner, self).run_trial(trial, *args, **kwargs)

In [None]:
from keras import layers
import keras


def lstm_tuning(hp):
    model = keras.Sequential()
    model.add(layers.Input(shape=input_shape_lstm))

    num_layers = hp.Int("num_layers", busca["num_layers"][0], busca["num_layers"][1])
    for i in range(num_layers):
        return_seq = (i < num_layers - 1)
        model.add(layers.LSTM(
            units=hp.Int(f"units_{i}", 
                         min_value=busca["units"][0], 
                         max_value=busca["units"][1], 
                         step=busca["units"][2]),
            return_sequences=return_seq
        ))
        model.add(layers.Dropout(
            rate=hp.Float(f"dropout_{i}", 
                          min_value=busca["dropout"][0], 
                          max_value=busca["dropout"][1], 
                          step=busca["dropout"][2])
        ))

    model.add(layers.Dense(1))

    model.compile(
        optimizer=keras.optimizers.Adam(
            hp.Choice("learning_rate", values=busca["learning_rate"])
        ),
        loss="mean_squared_error",
        metrics=["mean_absolute_error"]
    )
    return model


def ffn_tuning(hp):
    model = keras.Sequential()
    model.add(layers.Input(shape=input_shape_ffn))
    
    num_layers = hp.Int("num_layers", busca["num_layers"][0], busca["num_layers"][1])
    for i in range(num_layers):
        model.add(layers.Dense(
            units=hp.Int(f"units_{i}", 
                         min_value=busca["units"][0], 
                         max_value=busca["units"][1], 
                         step=busca["units"][2]),
            activation="relu"
        ))
        model.add(layers.Dropout(
            rate=hp.Float(f"dropout_{i}", 
                          min_value=busca["dropout"][0], 
                          max_value=busca["dropout"][1], 
                          step=busca["dropout"][2])
        ))
    
    model.add(layers.Dense(1))
    
    model.compile(
        optimizer=keras.optimizers.Adam(
            hp.Choice("learning_rate", values=busca["learning_rate"])
        ),
        loss="mean_squared_error",
        metrics=["mean_absolute_error"]
    )
    return model

In [None]:
import time

# LSTM
tuner_lstm = CustomTuner(
    lstm_tuning,
    objective="val_loss",
    max_trials=50,
    executions_per_trial=1,
    directory="tuner_results",
    project_name="lstm_tuning"
)

inicio_lstm = time.time()
tuner_lstm.search(
    X_train_lstm, y_train,
    epochs=100,
    validation_data=(X_val_lstm, y_val),
    callbacks=[es, red_lr]
)
fim_lstm = time.time()
tempo_lstm = (fim_lstm - inicio_lstm) / 60

best_lstm = tuner_lstm.get_best_models(num_models=1)[0]


# FFN
tuner_ffn = CustomTuner(
    ffn_tuning,
    objective="val_loss",
    max_trials=50,
    executions_per_trial=1,
    directory="tuner_results",
    project_name="ffn_tuning"
)

inicio_ffn = time.time()
tuner_ffn.search(
    X_train_ffn, y_train,
    epochs=100,
    validation_data=(X_val_ffn, y_val),
    callbacks=[es, red_lr]
)
fim_ffn = time.time()
tempo_ffn = (fim_ffn - inicio_ffn) / 60

best_ffn = tuner_ffn.get_best_models(num_models=1)[0]



Trial 50 Complete [00h 00m 12s]
val_loss: 0.03725679591298103

Best val_loss So Far: 0.022920265793800354
Total elapsed time: 00h 16m 30s


  saveable.load_own_variables(weights_store.get(inner_path))


Tempo de busca no LSTM: 765.3 min
Tempo de busca no FFN: 16.5 min
[1m60/60[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m6s[0m 87ms/step - loss: 0.0665 - mean_absolute_error: 0.0807
LSTM Teste: [0.06646670401096344, 0.0806591808795929]
[1m60/60[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0757 - mean_absolute_error: 0.0890  
FFN Teste: [0.07567929476499557, 0.08897057175636292]


In [41]:
print(f"Tempo de busca na LSTM: {tempo_lstm:.1f} min")
print(f"Tempo de busca na FFN: {tempo_ffn:.1f} min")


params_lstm = best_lstm.count_params()
params_ffn = best_ffn.count_params()

print(f"O melhor modelo LSTM possui {params_lstm:,} par√¢metros")
print(f"O melhor modelo FFN possui {params_ffn:,} par√¢metros")

Tempo de busca na LSTM: 765.3 min
Tempo de busca na FFN: 16.5 min
O melhor modelo LSTM possui 2,939,713 par√¢metros
O melhor modelo FFN possui 12,289 par√¢metros


<br>
A LSTM contem muito mais par√¢metros que a FFN, pois:

possui 4 blocos/port√µes que simulam individualmente uma FFN 

 - esquecimento, entrada, atualiza√ß√£o e sa√≠da (os 4 em cada estado de tempo)

<br>
e conex√µes com estados recorrentes que tornam o crescimento de par√¢metros uma fun√ß√£o quadr√°tico ao inv√©s de linear como na FFN

 - cada neur√¥nio em h(t) se conecta com todos os neur√¥nios em h(t-1)
<br>

In [32]:

df_top_lstm = gerar_tabela_melhores(tuner_lstm)
df_top_ffn = gerar_tabela_melhores(tuner_ffn)

print("LSTM:")
display(df_top_lstm) # display() no Jupyter ou print() no terminal

print("FFN:")
display(df_top_ffn)


LSTM:


Unnamed: 0,ranking,score_val_loss,num_layers,units_0,dropout_0,learning_rate,batch_size,units_1,dropout_1,units_2,dropout_2
0,1,0.021776,3,512,0.1,0.001,32,320.0,0.2,320.0,0.2
1,2,0.021848,3,384,0.1,0.001,32,448.0,0.1,256.0,0.0
2,3,0.021929,2,512,0.1,0.001,32,192.0,0.2,,
3,4,0.022001,2,320,0.0,0.001,32,320.0,0.2,,
4,5,0.02206,1,192,0.2,0.01,32,,,,


FFN:


Unnamed: 0,ranking,score_val_loss,num_layers,units_0,dropout_0,learning_rate,batch_size,units_1,dropout_1,units_2,dropout_2
0,1,0.02292,1,384,0.2,0.01,256,,,,
1,2,0.023287,1,192,0.0,0.01,256,,,,
2,3,0.023338,1,448,0.0,0.001,128,,,,
3,4,0.02357,1,320,0.1,0.001,32,,,,
4,5,0.023689,1,448,0.1,0.01,256,,,,


In [38]:
print("Resultado do LSTM no conjunto de teste:", best_lstm.evaluate(X_test_lstm, y_test))
print("Resultado do FFN no conjunto de teste:", best_ffn.evaluate(X_test_ffn, y_test))

[1m60/60[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m6s[0m 93ms/step - loss: 0.0665 - mean_absolute_error: 0.0807
Resultado do LSTM no conjunto de teste: [0.06646670401096344, 0.0806591808795929]
[1m60/60[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0757 - mean_absolute_error: 0.0890
Resultado do FFN no conjunto de teste: [0.07567929476499557, 0.08897057175636292]


<br>
Para ter uma compara√ß√£o justa em termos de par√¢metros dentro dos modelos e tempo gasto, vou tentar indicar um espa√ßo de busca que leve a mais par√¢metros e permitir mais trials na busca da FFN üôÇ

<br>
os par√¢metros em uma FFN s√£o os pesos e os bias dos neur√¥nios, ent√£o a quantidade total de par√¢metros ser√° o resultado do n√∫mero de camadas e do n√∫mero de neur√¥nios nessas camadas

cada neur√¥nio possui 1 bias e x pesos, onde x √© o n√∫mero de sa√≠das da camada anterior (ou a quantidade de features, caso seja a primeira camada oculta)

as camadas s√£o dividas em entrada, camadas ocultas e sa√≠da. assim, uma FFN com 30 atributos na entrada (como as 30 vaz√µes usadas nesse experimento), 2 camadas ocultas com 512 neur√¥nios cada e 1 sa√≠da tem o n√∫mero de par√¢metros calculado da seguinte forma:

- a camada de entrada n√£o possui par√¢metros a serem calibrados, pois √© composta apenas pelos atributos de entrada  
- a primeira camada oculta ter√° (30+1)*512 par√¢metros  
- a segunda ter√° (512+1)*512 par√¢metros  
- e a camada de sa√≠da ter√° (512+1)*1 par√¢metros (ela possui apenas um neur√¥nio, pois o modelo construido possui apenas uma sa√≠da)
<br>

In [40]:
# simulando n√∫mero de par√¢metros

c1= 1024
c2= 1024
c3= 0
c4= 0
print(f'N√∫mero de par√¢metros: {(30+1)*c1+(c1+1)*c2+(c2+1)*1}')

c1= 1664
c2= 1664
c3= 1664
c4= 1664
print(f'N√∫mero de par√¢metros: {(30+1)*c1+(c1+1)*c2+(c2+1)*c3+(c3+1)*c4+(c4+1)*1}')

N√∫mero de par√¢metros: 1082369
N√∫mero de par√¢metros: 8364929


In [None]:
busca_ffn_novo = {
    "num_layers": (2, 4),
    "units": (1024, 1664, 128),
    "dropout": (0.0, 0.3, 0.1),
    "learning_rate": [1e-1, 1e-2, 1e-3],
    "batch_size": [32, 64, 128, 256]
}


class CustomTuner(kt.RandomSearch):
    def run_trial(self, trial, *args, **kwargs):
        # usa os valores do dicion√°rio busca
        kwargs['batch_size'] = trial.hyperparameters.Choice('batch_size', values=busca_ffn_novo["batch_size"])

        return super(CustomTuner, self).run_trial(trial, *args, **kwargs)
    
def ffn_tuning_novo(hp):
    model = keras.Sequential()
    model.add(layers.Input(shape=input_shape_ffn))
    
    num_layers = hp.Int("num_layers", busca_ffn_novo["num_layers"][0], busca_ffn_novo["num_layers"][1])
    for i in range(num_layers):
        model.add(layers.Dense(
            units=hp.Int(f"units_{i}", 
                         min_value=busca_ffn_novo["units"][0], 
                         max_value=busca_ffn_novo["units"][1], 
                         step=busca_ffn_novo["units"][2]),
            activation="relu"
        ))
        model.add(layers.Dropout(
            rate=hp.Float(f"dropout_{i}", 
                          min_value=busca_ffn_novo["dropout"][0], 
                          max_value=busca_ffn_novo["dropout"][1], 
                          step=busca_ffn_novo["dropout"][2])
        ))
    
    model.add(layers.Dense(1))
    
    model.compile(
        optimizer=keras.optimizers.Adam(
            hp.Choice("learning_rate", values=busca_ffn_novo["learning_rate"])
        ),
        loss="mean_squared_error",
        metrics=["mean_absolute_error"]
    )
    return model

In [43]:
tuner_ffn_novo = CustomTuner(
    ffn_tuning_novo,
    objective="val_loss",
    max_trials=150,
    executions_per_trial=1,
    directory="tuner_results",
    project_name="ffn_tuning_novo"
)

inicio_ffn_novo = time.time()
tuner_ffn_novo.search(
    X_train_ffn, y_train,
    epochs=100,
    validation_data=(X_val_ffn, y_val),
    callbacks=[es, red_lr]
)
fim_ffn_novo = time.time()
tempo_ffn_novo = (fim_ffn_novo - inicio_ffn_novo) / 60

best_ffn_novo = tuner_ffn_novo.get_best_models(num_models=1)[0]

Trial 150 Complete [00h 02m 31s]
val_loss: 5.302131175994873

Best val_loss So Far: 0.02299734763801098
Total elapsed time: 06h 49m 28s


  saveable.load_own_variables(weights_store.get(inner_path))


In [None]:
print(f"Tempo de busca na nova FFN: {tempo_ffn_novo:.1f} min")

params_ffn_novo = best_ffn_novo.count_params()
print(f"O melhor modelo FFN novo possui {params_ffn_novo:,} par√¢metros")

Tempo de busca na nova FFN: 409.5 min
O melhor modelo FFN novo possui 2,209,409 par√¢metros


In [45]:
df_top_ffn_novo = gerar_tabela_melhores(tuner_ffn_novo)

print("FFN novo:")
display(df_top_ffn_novo)

FFN novo:


Unnamed: 0,ranking,score_val_loss,num_layers,units_0,dropout_0,units_1,dropout_1,learning_rate,batch_size,units_2,dropout_2,units_3,dropout_3
0,1,0.022997,2,1408,0.0,1536,0.1,0.001,256,,,,
1,2,0.023055,2,1152,0.0,1024,0.1,0.001,256,,,,
2,3,0.023451,2,1280,0.0,1408,0.0,0.001,128,,,,
3,4,0.023536,2,1408,0.0,1280,0.0,0.001,128,,,,
4,5,0.023879,2,1664,0.2,1280,0.1,0.001,256,,,,


<br>
Mesmo ap√≥s essa busca por um modelo com mais par√¢metros internos para serem calibrados, o novo melhor modelo FFN n√£o obteve resultados melhores no conjunto de valida√ß√£o. Al√©m disso, os melhores modelos continuaram a escolher o menor n√∫mero de camadas permitidas na busca: duas.
<br>

Para o conjunto de treinamento, os resultados foram os seguintes:
<br>

In [4]:
y_pred_lstm = best_lstm.predict(X_test_lstm, verbose=0).ravel()
y_pred_ffn = best_ffn.predict(X_test_ffn, verbose=0).ravel()
y_pred_ffn_novo = best_ffn_novo.predict(X_test_ffn, verbose=0).ravel()

residuos_lstm = y_test - y_pred_lstm
residuos_ffn = y_test - y_pred_ffn
residuos_ffn_novo = y_test - y_pred_ffn_novo

print('mean squared error no conjunto de teste:')
print(f'lstm: {np.mean(residuos_lstm**2):.6f}')
print(f'ffn: {np.mean(residuos_ffn**2):.6f}')
print(f'ffn novo: {np.mean(residuos_ffn_novo**2):.6f}')


from scipy.stats import wilcoxon

# teste 1: LSTM vs FFN
statistic_lstm_ffn, p_value_lstm_ffn = wilcoxon(
    np.abs(residuos_lstm), 
    np.abs(residuos_ffn),
    alternative='less'
)

# teste 2: LSTM vs FFN novo
statistic_lstm_ffn_novo, p_value_lstm_ffn_novo = wilcoxon(
    np.abs(residuos_lstm), 
    np.abs(residuos_ffn_novo),
    alternative='less'
)

print()
print('Teste de wilcoxon pareado para os residuos dos modelos')
print(f"p-valor de lstm x ffn: {p_value_lstm_ffn:.6f}")
print(f"p-valor de lstm x ffn novo: {p_value_lstm_ffn_novo:.6f}")

mean squared error no conjunto de teste:
lstm: 0.066462
ffn: 0.075683
ffn novo: 0.074079

Teste de wilcoxon pareado para os residuos dos modelos
p-valor de lstm x ffn: 0.000000
p-valor de lstm x ffn novo: 0.066849


<br>
Conclus√£o: mesmo ap√≥s a nova tentativa, os valores da melhor LSTM foram mais precisos que os da melhor FFN tanto nos dados de valida√ß√£o quanto nos dados de teste üôÇ