In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from itertools import product

# --- funções de métricas hidrológicas ---
def rmse(observed, simulated):
    return np.sqrt(mean_squared_error(observed, simulated))

def nse(observed, simulated):
    """Nash–Sutcliffe Efficiency"""
    obs = np.array(observed)
    sim = np.array(simulated)
    return 1 - np.sum((obs - sim)**2) / np.sum((obs - np.mean(obs))**2)

def kge(observed, simulated):
    """Kling-Gupta Efficiency"""
    obs = np.array(observed)
    sim = np.array(simulated)
    r = np.corrcoef(obs, sim)[0,1]
    alpha = np.std(sim) / np.std(obs)
    beta  = np.mean(sim) / np.mean(obs)
    return 1 - np.sqrt((r-1)**2 + (alpha-1)**2 + (beta-1)**2)

def pbias(observed, simulated):
    """Percent Bias"""
    return 100.0 * np.sum(simulated - observed) / np.sum(observed)

# --- prepara features e target ---
def prepare_features(df, n_lags, n_ahead, use_monthly_mean):
    df = df.copy().sort_values('Data').reset_index(drop=True)
    df['month'] = df['Data'].dt.month

    # 1) dessazonalizar:
    if use_monthly_mean:
        split_idx = int(len(df) * 0.7)
        train_base = df.iloc[:split_idx]
        mens = train_base.groupby('month')['Vazao'].mean()
        df['Vazao'] = df['Vazao'] - df['month'].map(mens)

    # 2) gera lags e target a partir da série já dessazonalizada
    for lag in range(1, n_lags+1):
        df[f'lag_{lag}'] = df['Vazao'].shift(lag-1)
    df[f'y_{n_ahead}d'] = df['Vazao'].shift(-n_ahead)

    # 3) descarta NA, divide em treino/teste e extrai X/y
    cols_X = [f'lag_{lag}' for lag in range(1, n_lags+1)]
    df = df.dropna(subset=cols_X + [f'y_{n_ahead}d']).reset_index(drop=True)
    split_idx = int(len(df) * 0.7)
    df_train, df_test = df.iloc[:split_idx], df.iloc[split_idx:]

    X_train = df_train[cols_X].values
    y_train = df_train[f'y_{n_ahead}d'].values
    X_test  = df_test[cols_X].values
    y_test  = df_test[f'y_{n_ahead}d'].values

    # se precisar do vetor de média para reverter só no teste:
    vaz_med_test = None
    if use_monthly_mean:
        vaz_med_test = df_test['month'].map(mens).values

    return X_train, X_test, y_train, y_test, vaz_med_test


# --- loop de experimentos ---
def run_experiments(df):
    results = []
    opts_vaz = ['Sim', 'Não'] # = ['Sim', 'Não']
    opts_lags = [1, 7, 30] # = [1, 7, 30]
    opts_ahead = [1, 7, 30] # = [1, 2, 7, 30]

    #callback do early stopping antes do loop (ou logo antes de treinar cada modelo)
    early_stop = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=200,
        min_delta=1e-4,
        restore_best_weights=True)

    for vaz_opt, n_lags, n_ahead in product(opts_vaz, opts_lags, opts_ahead):
        use_mm = (vaz_opt == 'Sim')

        X_train, X_test, y_train, y_test, vaz_med = prepare_features(df, n_lags, n_ahead, use_mm)
        print(f"\n>>> n_ahead = {n_ahead}")
        print("y_test head:", y_test[:5])
        print("Primeiras 5 linhas de X_test:\n", X_test[:5, :])
        # escalonamento
        scaler = MinMaxScaler()
        X_train = scaler.fit_transform(X_train)
        X_test  = scaler.transform(X_test)

        # monta o modelo keras
        model = tf.keras.Sequential([
            tf.keras.layers.InputLayer(input_shape=(X_train.shape[1],)),
            tf.keras.layers.Dense(128, activation='relu'),
            tf.keras.layers.Dense(64, activation='relu'),
            tf.keras.layers.Dense(1)
        ])
        model.compile(optimizer='adam', loss='mse')

        # treina
        history = model.fit(
            X_train, y_train,
            epochs=625,
            batch_size=32,
            validation_split=0.2,
            callbacks=[early_stop],
            verbose=0
        )

        # extrai quantas épocas rodaram
        epochs_by_history = len(history.history['loss'])

        # previsao
        y_pred = model.predict(X_test).flatten()

        if use_mm== True:
            y_pred= y_pred+vaz_med
            y_test= y_test+vaz_med

        #métricas
        rmse_  = rmse(y_test, y_pred)
        nse_   = nse(y_test, y_pred)
        kge_   = kge(y_test, y_pred)
        pbias_ = pbias(y_test, y_pred)

        results.append({
            'vazao_menos_vaz_mensal_med': vaz_opt,
            'n_vazoes_anteriores':      n_lags,
            'n_dias_a_frente':          n_ahead,
            'RMSE':  rmse_,
            'NSE':   nse_,
            'KGE':   kge_,
            'Pbias': pbias_,
            'epochs':epochs_by_history
        })

        print(f'Concluído: mensal_med={vaz_opt}, lags={n_lags}, ahead={n_ahead}')

    return pd.DataFrame(results)

if __name__ == '__main__':
    #ler a série histórica
    #df = pd.read_csv('uniaoh.csv', parse_dates=['Data'])
    df = pd.read_csv('obidos.csv')
    df['Data'] = pd.to_datetime(df['Data'], format='%d/%m/%Y')

    resultados = run_experiments(df)
    resultados.to_csv('resultados_metricas_2.csv', index=False)
    print('Todos os cenários executados.')



>>> n_ahead = 1
y_test head: [12951.00210526 13733.60210526 14321.70210526 14910.90210526
 15304.20210526]
Primeiras 5 linhas de X_test:
 [[12170.10210526]
 [12951.00210526]
 [13733.60210526]
 [14321.70210526]
 [14910.90210526]]




[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 755us/step
Concluído: mensal_med=Sim, lags=1, ahead=1

>>> n_ahead = 7
y_test head: [14321.70210526 14910.90210526 15304.20210526 15501.00210526
 -5802.95832607]
Primeiras 5 linhas de X_test:
 [[ 7138.40210526]
 [ 8678.50210526]
 [10419.80210526]
 [11196.60210526]
 [12170.10210526]]




[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 732us/step
Concluído: mensal_med=Sim, lags=1, ahead=7

>>> n_ahead = 30
y_test head: [-4021.45832607 -3027.95832607 -1632.35832607 -1232.55832607
   -30.75832607]
Primeiras 5 linhas de X_test:
 [[-7678.39789474]
 [-6394.59789474]
 [-5105.09789474]
 [-3995.19789474]
 [-3067.19789474]]




[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 595us/step
Concluído: mensal_med=Sim, lags=1, ahead=30

>>> n_ahead = 1
y_test head: [14321.70210526 14910.90210526 15304.20210526 15501.00210526
 -5802.95832607]
Primeiras 5 linhas de X_test:
 [[13733.60210526 12951.00210526 12170.10210526 11196.60210526
  10419.80210526  8678.50210526  7138.40210526]
 [14321.70210526 13733.60210526 12951.00210526 12170.10210526
  11196.60210526 10419.80210526  8678.50210526]
 [14910.90210526 14321.70210526 13733.60210526 12951.00210526
  12170.10210526 11196.60210526 10419.80210526]
 [15304.20210526 14910.90210526 14321.70210526 13733.60210526
  12951.00210526 12170.10210526 11196.60210526]
 [15501.00210526 15304.20210526 14910.90210526 14321.70210526
  13733.60210526 12951.00210526 12170.10210526]]




[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 796us/step
Concluído: mensal_med=Sim, lags=7, ahead=1

>>> n_ahead = 7
y_test head: [14910.90210526 15304.20210526 15501.00210526 -5802.95832607
 -5012.25832607]
Primeiras 5 linhas de X_test:
 [[ 8678.50210526  7138.40210526  5605.50210526  4079.80210526
   2750.90210526  1616.20210526   862.00210526]
 [10419.80210526  8678.50210526  7138.40210526  5605.50210526
   4079.80210526  2750.90210526  1616.20210526]
 [11196.60210526 10419.80210526  8678.50210526  7138.40210526
   5605.50210526  4079.80210526  2750.90210526]
 [12170.10210526 11196.60210526 10419.80210526  8678.50210526
   7138.40210526  5605.50210526  4079.80210526]
 [12951.00210526 12170.10210526 11196.60210526 10419.80210526
   8678.50210526  7138.40210526  5605.50210526]]




[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 586us/step
Concluído: mensal_med=Sim, lags=7, ahead=7

>>> n_ahead = 30
y_test head: [-1632.35832607 -1232.55832607   -30.75832607   973.84167393
  1175.04167393]
Primeiras 5 linhas de X_test:
 [[ -5105.09789474  -6394.59789474  -7678.39789474  -8774.29789474
   -9684.29789474 -11134.19789474   -443.5045045 ]
 [ -3995.19789474  -5105.09789474  -6394.59789474  -7678.39789474
   -8774.29789474  -9684.29789474 -11134.19789474]
 [ -3067.19789474  -3995.19789474  -5105.09789474  -6394.59789474
   -7678.39789474  -8774.29789474  -9684.29789474]
 [ -2695.19789474  -3067.19789474  -3995.19789474  -5105.09789474
   -6394.59789474  -7678.39789474  -8774.29789474]
 [ -2508.99789474  -2695.19789474  -3067.19789474  -3995.19789474
   -5105.09789474  -6394.59789474  -7678.39789474]]




[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 527us/step
Concluído: mensal_med=Sim, lags=7, ahead=30

>>> n_ahead = 1
y_test head: [-4021.45832607 -3027.95832607 -1632.35832607 -1232.55832607
   -30.75832607]
Primeiras 5 linhas de X_test:
 [[-5012.25832607 -5802.95832607 15501.00210526 15304.20210526
  14910.90210526 14321.70210526 13733.60210526 12951.00210526
  12170.10210526 11196.60210526 10419.80210526  8678.50210526
   7138.40210526  5605.50210526  4079.80210526  2750.90210526
   1616.20210526   862.00210526   -78.09789474 -1202.49789474
  -1202.49789474 -1763.09789474 -2508.99789474 -2508.99789474
  -2695.19789474 -3067.19789474 -3995.19789474 -5105.09789474
  -6394.59789474 -7678.39789474]
 [-4021.45832607 -5012.25832607 -5802.95832607 15501.00210526
  15304.20210526 14910.90210526 14321.70210526 13733.60210526
  12951.00210526 12170.10210526 11196.60210526 10419.80210526
   8678.50210526  7138.40210526  5605.50210526  4079.80210526
   2750.90210526  1616.20



[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 578us/step
Concluído: mensal_med=Sim, lags=30, ahead=1

>>> n_ahead = 7
y_test head: [-1632.35832607 -1232.55832607   -30.75832607   973.84167393
  1175.04167393]
Primeiras 5 linhas de X_test:
 [[ 14910.90210526  14321.70210526  13733.60210526  12951.00210526
   12170.10210526  11196.60210526  10419.80210526   8678.50210526
    7138.40210526   5605.50210526   4079.80210526   2750.90210526
    1616.20210526    862.00210526    -78.09789474  -1202.49789474
   -1202.49789474  -1763.09789474  -2508.99789474  -2508.99789474
   -2695.19789474  -3067.19789474  -3995.19789474  -5105.09789474
   -6394.59789474  -7678.39789474  -8774.29789474  -9684.29789474
  -11134.19789474   -443.5045045 ]
 [ 15304.20210526  14910.90210526  14321.70210526  13733.60210526
   12951.00210526  12170.10210526  11196.60210526  10419.80210526
    8678.50210526   7138.40210526   5605.50210526   4079.80210526
    2750.90210526   1616.20210526    862.0021



[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 826us/step
Concluído: mensal_med=Sim, lags=30, ahead=7

>>> n_ahead = 30
y_test head: [2991.14167393 4003.84167393 4612.84167393 5426.34167393 6037.54167393]
Primeiras 5 linhas de X_test:
 [[ -1202.49789474  -1763.09789474  -2508.99789474  -2508.99789474
   -2695.19789474  -3067.19789474  -3995.19789474  -5105.09789474
   -6394.59789474  -7678.39789474  -8774.29789474  -9684.29789474
  -11134.19789474   -443.5045045   -1162.4045045   -1700.2045045
   -1700.2045045   -2772.8045045   -3841.1045045   -4018.8045045
   -4728.1045045   -6141.1045045   -7371.1045045   -8769.7045045
   -9987.1045045  -11025.9045045  -12060.4045045  -12575.9045045
  -13432.8045045  -13261.7045045 ]
 [ -1202.49789474  -1202.49789474  -1763.09789474  -2508.99789474
   -2508.99789474  -2695.19789474  -3067.19789474  -3995.19789474
   -5105.09789474  -6394.59789474  -7678.39789474  -8774.29789474
   -9684.29789474 -11134.19789474   -443.5045045   -11



[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 750us/step
Concluído: mensal_med=Sim, lags=30, ahead=30

>>> n_ahead = 1
y_test head: [128253.8 129036.4 129624.5 130213.7 130607. ]
Primeiras 5 linhas de X_test:
 [[127472.9]
 [128253.8]
 [129036.4]
 [129624.5]
 [130213.7]]




[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 546us/step
Concluído: mensal_med=Não, lags=1, ahead=1

>>> n_ahead = 7
y_test head: [129624.5 130213.7 130607.  130803.8 131789.6]
Primeiras 5 linhas de X_test:
 [[122441.2]
 [123981.3]
 [125722.6]
 [126499.4]
 [127472.9]]




[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 590us/step
Concluído: mensal_med=Não, lags=1, ahead=7

>>> n_ahead = 30
y_test head: [133571.1 134564.6 135960.2 136360.  137561.8]
Primeiras 5 linhas de X_test:
 [[107624.4]
 [108908.2]
 [110197.7]
 [111307.6]
 [112235.6]]




[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 703us/step
Concluído: mensal_med=Não, lags=1, ahead=30

>>> n_ahead = 1
y_test head: [129624.5 130213.7 130607.  130803.8 131789.6]
Primeiras 5 linhas de X_test:
 [[129036.4 128253.8 127472.9 126499.4 125722.6 123981.3 122441.2]
 [129624.5 129036.4 128253.8 127472.9 126499.4 125722.6 123981.3]
 [130213.7 129624.5 129036.4 128253.8 127472.9 126499.4 125722.6]
 [130607.  130213.7 129624.5 129036.4 128253.8 127472.9 126499.4]
 [130803.8 130607.  130213.7 129624.5 129036.4 128253.8 127472.9]]




[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 529us/step
Concluído: mensal_med=Não, lags=7, ahead=1

>>> n_ahead = 7
y_test head: [130213.7 130607.  130803.8 131789.6 132580.3]
Primeiras 5 linhas de X_test:
 [[123981.3 122441.2 120908.3 119382.6 118053.7 116919.  116164.8]
 [125722.6 123981.3 122441.2 120908.3 119382.6 118053.7 116919. ]
 [126499.4 125722.6 123981.3 122441.2 120908.3 119382.6 118053.7]
 [127472.9 126499.4 125722.6 123981.3 122441.2 120908.3 119382.6]
 [128253.8 127472.9 126499.4 125722.6 123981.3 122441.2 120908.3]]




[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 569us/step
Concluído: mensal_med=Não, lags=7, ahead=7

>>> n_ahead = 30
y_test head: [135960.2 136360.  137561.8 138566.4 138767.6]
Primeiras 5 linhas de X_test:
 [[110197.7 108908.2 107624.4 106528.5 105618.5 104168.6 102906.1]
 [111307.6 110197.7 108908.2 107624.4 106528.5 105618.5 104168.6]
 [112235.6 111307.6 110197.7 108908.2 107624.4 106528.5 105618.5]
 [112607.6 112235.6 111307.6 110197.7 108908.2 107624.4 106528.5]
 [112793.8 112607.6 112235.6 111307.6 110197.7 108908.2 107624.4]]




[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 626us/step
Concluído: mensal_med=Não, lags=7, ahead=30

>>> n_ahead = 1
y_test head: [133571.1 134564.6 135960.2 136360.  137561.8]
Primeiras 5 linhas de X_test:
 [[132580.3 131789.6 130803.8 130607.  130213.7 129624.5 129036.4 128253.8
  127472.9 126499.4 125722.6 123981.3 122441.2 120908.3 119382.6 118053.7
  116919.  116164.8 115224.7 114100.3 114100.3 113539.7 112793.8 112793.8
  112607.6 112235.6 111307.6 110197.7 108908.2 107624.4]
 [133571.1 132580.3 131789.6 130803.8 130607.  130213.7 129624.5 129036.4
  128253.8 127472.9 126499.4 125722.6 123981.3 122441.2 120908.3 119382.6
  118053.7 116919.  116164.8 115224.7 114100.3 114100.3 113539.7 112793.8
  112793.8 112607.6 112235.6 111307.6 110197.7 108908.2]
 [134564.6 133571.1 132580.3 131789.6 130803.8 130607.  130213.7 129624.5
  129036.4 128253.8 127472.9 126499.4 125722.6 123981.3 122441.2 120908.3
  119382.6 118053.7 116919.  116164.8 115224.7 114100.3 114100.3 



[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 592us/step
Concluído: mensal_med=Não, lags=30, ahead=1

>>> n_ahead = 7
y_test head: [135960.2 136360.  137561.8 138566.4 138767.6]
Primeiras 5 linhas de X_test:
 [[130213.7 129624.5 129036.4 128253.8 127472.9 126499.4 125722.6 123981.3
  122441.2 120908.3 119382.6 118053.7 116919.  116164.8 115224.7 114100.3
  114100.3 113539.7 112793.8 112793.8 112607.6 112235.6 111307.6 110197.7
  108908.2 107624.4 106528.5 105618.5 104168.6 102906.1]
 [130607.  130213.7 129624.5 129036.4 128253.8 127472.9 126499.4 125722.6
  123981.3 122441.2 120908.3 119382.6 118053.7 116919.  116164.8 115224.7
  114100.3 114100.3 113539.7 112793.8 112793.8 112607.6 112235.6 111307.6
  110197.7 108908.2 107624.4 106528.5 105618.5 104168.6]
 [130803.8 130607.  130213.7 129624.5 129036.4 128253.8 127472.9 126499.4
  125722.6 123981.3 122441.2 120908.3 119382.6 118053.7 116919.  116164.8
  115224.7 114100.3 114100.3 113539.7 112793.8 112793.8 112607.6 



[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 784us/step
Concluído: mensal_med=Não, lags=30, ahead=7

>>> n_ahead = 30
y_test head: [140583.7 141596.4 142205.4 143018.9 143630.1]
Primeiras 5 linhas de X_test:
 [[114100.3 113539.7 112793.8 112793.8 112607.6 112235.6 111307.6 110197.7
  108908.2 107624.4 106528.5 105618.5 104168.6 102906.1 102187.2 101649.4
  101649.4 100576.8  99508.5  99330.8  98621.5  97208.5  95978.5  94579.9
   93362.5  92323.7  91289.2  90773.7  89916.8  90087.9]
 [114100.3 114100.3 113539.7 112793.8 112793.8 112607.6 112235.6 111307.6
  110197.7 108908.2 107624.4 106528.5 105618.5 104168.6 102906.1 102187.2
  101649.4 101649.4 100576.8  99508.5  99330.8  98621.5  97208.5  95978.5
   94579.9  93362.5  92323.7  91289.2  90773.7  89916.8]
 [115224.7 114100.3 114100.3 113539.7 112793.8 112793.8 112607.6 112235.6
  111307.6 110197.7 108908.2 107624.4 106528.5 105618.5 104168.6 102906.1
  102187.2 101649.4 101649.4 100576.8  99508.5  99330.8  98621.5



[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 750us/step
Concluído: mensal_med=Não, lags=30, ahead=30
Todos os cenários executados.


In [6]:
resultados

Unnamed: 0,vazao_menos_vaz_mensal_med,n_vazoes_anteriores,n_dias_a_frente,RMSE,NSE,KGE,Pbias,epochs
0,Sim,1,1,3926.516823,0.994987,0.989808,-0.149088,288
1,Sim,1,7,10222.362443,0.966578,0.929508,-1.522393,336
2,Sim,1,30,12653.705049,0.944663,0.951126,-1.700513,625
3,Sim,7,1,3911.855505,0.995024,0.990841,-0.152238,582
4,Sim,7,7,9593.713335,0.970563,0.951429,-0.845453,625
5,Sim,7,30,12393.372634,0.946906,0.955212,-1.572042,442
6,Sim,30,1,3869.209075,0.995133,0.991941,-0.069753,625
7,Sim,30,7,7604.265204,0.981502,0.96706,-0.795904,625
8,Sim,30,30,11254.901532,0.956191,0.972025,-1.193274,625
9,Não,1,1,1116.30468,0.999593,0.999413,-0.01785,598
