In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
import datetime
import os
import plotly.graph_objects as go
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

### Leitura do dataset para treino da LSTM (RNN) em séries temporais

In [None]:
RANDOM_SEED=21

model = "lstm"
src_type = "regular"

dir_results = f"../../data/results/{src_type}"
dir_figures = f"{dir_results}/figures/{model}"

if not os.path.exists(dir_figures):
    os.makedirs(dir_figures)

path_datasets = "../../data/datasets"
dataset = "Itaipu_POC_VAZAO_V3.csv"

## Número de Semanas Operativas Retroativas a serem utilizadas no Treinamento dos Algoritmos. min(n)=1
n = 6

## Número da Semana Operativa Futura da Vazão a ser prevista pelos Modelos. min(f)=1
f = 1

In [None]:
df = pd.read_csv(f'{path_datasets}/{dataset}', index_col='time')
# df.reset_index().drop(columns='time')
df

In [None]:
df['time'] = df.index
df.reset_index(drop=True,inplace=True)
df[f'bacia_prec_sum_shift_f={f}'] = df['bacia_prec_sum'].shift(-f)
df = df.dropna()
df

In [None]:
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

# Columns to scale for X and y
columns_to_scale_X = [f'bacia_prec_sum_shift_f={f}'] # 'bacia_prec_sum',
columns_to_scale_y = ['vazao_itaipu']

# Fit scalers on the selected columns and transform
scaled_data_X = scaler_X.fit_transform(df[columns_to_scale_X])
scaled_data_y = scaler_y.fit_transform(df[columns_to_scale_y])

# Create DataFrame with scaled data
scaled_X = pd.DataFrame(scaled_data_X, columns=columns_to_scale_X)
scaled_y = pd.DataFrame(scaled_data_y, columns=columns_to_scale_y)

# Concatenate scaled columns to the original DataFrame
new_df = pd.concat([df.time, scaled_X, scaled_y], axis=1)
new_df

In [None]:
new_df = new_df.values
new_df

### Divisão dos datasets em séries temporais e treino e teste

In [None]:
def split_sequences(sequences, n_steps, f_pred):
    X, y = list(), list()
    for i in range(len(sequences)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the dataset
        if end_ix+(f_pred-1) >= len(sequences):
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequences[i:end_ix, :], sequences[end_ix+(f_pred-1), [0,2]]
        X.append(seq_x)
        y.append(seq_y)
        
    return np.array(X), np.array(y)

In [None]:
len(new_df)#Divide uma sequencia multivariável em amostras

In [None]:
### Para debugar a função split_sequences
sequences = new_df
i=0
end_ix=i+n

print(end_ix, end_ix+(f-1))

sequences[i:end_ix, :], sequences[end_ix+(f-1), [0,2]] 

# Na sequência do target (y)
#
# end_ix -> 1 S.O futura
#
# end_ix+1 -> 2 S.O futuras
#
# end_ix+2 -> 3 S.O futuras

In [None]:
X, y = split_sequences(new_df, n, f)
X.shape, y.shape

In [None]:
seq = 0
X[seq], y[seq]

In [None]:
X_train_, X_test_, y_train_, y_test_ = train_test_split(X, y, test_size=0.2, random_state=RANDOM_SEED)
X_train_.shape, X_test_.shape, y_train_.shape, y_test_.shape

In [None]:
seq = 0
X_train_[:,:,1:][seq], y_train_[:,1][seq]

### LSTM

In [None]:
now = datetime.datetime.now().strftime('%Y%m%d') # _%Hh%M
modelo_numerico = 'so_prev' # previsão para a semana operacional seguinte

dir_rna = f'{dir_results}/rna/{modelo_numerico}_{now}'
if not os.path.exists(dir_rna):
    os.makedirs(dir_rna)

file_ann = f'{dir_rna}/ann_{modelo_numerico}.h5' 
best_file_ann = f'{dir_rna}/best_ann_{modelo_numerico}.h5' 

In [None]:
monitor_metric = 'val_mean_absolute_error'
patience=15
n_neurons = 256
max_epochs = 500
n_hidden_layers = 3

In [None]:
X_train_[:,:,1:].shape[1:]

In [None]:
def build_model(X_train_, n_neurons_hl, activation): 
    model = tf.keras.Sequential([ 
        tf.keras.layers.LSTM(
            units=n_neurons_hl[0], 
            activation=activation,
            input_shape=[*X_train_.shape[1:]],
            return_sequences=(True if len(n_neurons_hl) > 1 else False)
        ),
        *[
            tf.keras.layers.LSTM(
                units=n_neurons, 
                activation=activation,
                return_sequences=(True if idx != len(n_neurons_hl[1:])-1 else False) 
            ) for idx, n_neurons in enumerate(n_neurons_hl[1:])
        ],
        tf.keras.layers.Dense(1)
    ])

    return model

In [None]:
model = build_model(X_train_[:,:,1:], [50,60], 'relu')

In [None]:
model.compile(loss=tf.losses.MeanSquaredError(),
                optimizer=tf.optimizers.Adam(),
                metrics=[tf.metrics.MeanAbsoluteError()])

callbacks = [
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss', 
        patience=patience, 
        restore_best_weights=True,
        mode='min'
    ),
    tf.keras.callbacks.ModelCheckpoint(
        filepath=best_file_ann, 
        monitor=monitor_metric,
        verbose=True, 
        save_best_only=True
        )  
    ]

In [None]:
model.summary()

In [None]:
history = model.fit(
    X_train_[:,:,1:].astype('float32'),
    y_train_[:,1].astype('float32'),
    epochs=max_epochs,
    verbose=True,
    validation_split=0.2,
    callbacks=callbacks,
) 

In [None]:
model.save(file_ann) # salva o modelo atual

### importamos o modelo que melhor performou em 'monitor_metric' durante o treinamento para analisar

In [None]:
model = tf.keras.models.load_model(best_file_ann) # importamos o modelo que melhor performou em 'monitor_metric' durante o treinamento para analisar
# model = tf.keras.models.load_model(file_ann)

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=list(range(1, len(history.history['loss']) + 1)),
                         y=history.history['loss'],
                         mode='lines',
                         name='Train Loss'))

fig.add_trace(go.Scatter(x=list(range(1, len(history.history['val_loss']) + 1)),
                         y=history.history['val_loss'],
                         mode='lines',
                         name='Validation Loss'))

fig.update_layout(title='Training and Validation Loss',
                  xaxis_title='Epoch',
                  yaxis_title='Loss',
                  legend=dict(x=0, y=1, traceorder='normal'),
                  width=900, height=600)

fig.write_image(f"{dir_figures}/rna_training_validation_loss_plot.png")

fig.show()

### Retoma a transformação

In [None]:
y_pred_ = model.predict(X_test_[:,:,1:].astype('float32'))#.astype('float32')
y_pred = scaler_y.inverse_transform(y_pred_)#.ravel()

In [None]:
y_test = scaler_y.inverse_transform(y_test_[:,1].reshape(-1, 1))

In [None]:
# y_test = y_test.reshape(-1, y_test.shape[-1])

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
corr = np.corrcoef(y_test.T, y_pred.T)[0, 1]

metrics_df = pd.DataFrame(
    columns=['MAE', 'MSE', 'RMSE', 'R2', 'Corr'],
    index=['Decision Tree']
)

metrics_df['MAE'] = mae
metrics_df['MSE'] = mse
metrics_df['RMSE'] = rmse
metrics_df['R2'] = r2
metrics_df['Corr'] = corr
metrics_df

In [None]:
y_pred.max()

In [None]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=y_test.ravel(),
        y=y_pred.ravel(),
        mode='markers',
        marker=dict(color='blue', opacity=0.5, line=dict(color='black', width=1)),
        name='Measured vs Predicted'
    )
)


fig.add_trace(
    go.Scatter(
        x=[y_pred.min(), y_pred.max()],
        y=[y_pred.min(), y_pred.max()],
        mode='lines',
        line=dict(color='red', dash='dash'),
        name='Identity Line'
    )
)

fig.update_layout(
    title='Measured vs Predicted',
    xaxis=dict(title='y_true'),
    yaxis=dict(title='y_pred'),
    autosize=False,
    width=800,
    height=500,
    margin=dict(l=0, r=0, b=0, t=40),
    showlegend=True
)

fig.write_image(f"{dir_figures}/scattered_measured_vs_predicted_plot.png")

fig.show()


In [None]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=df.time.values,
        y=df.vazao_itaipu.values, # vazão observada
        mode='lines',
        name='Vazão observada',
    )
)

fig.add_trace(
    go.Scatter(
        x=y_test_[:,0],
        y=y_pred.ravel(), # vazão prevista
        mode='markers',
        name='Forecast',
    )
)

fig.update_layout(title=f'Predição - Itaipu')

fig.write_image(f"{dir_figures}/history_measured_vs_predicted_plot.png", width=1400, scale=1)

fig.show()

### Outros

In [None]:
# def gradient_importance(seq, model):
#     seq = tf.Variable(seq[np.newaxis,:,:], dtype=tf.float32)
#     with tf.GradientTape() as tape:
#         predictions = model(seq)
#     grads = tape.gradient(predictions, seq)
#     grads = tf.reduce_mean(grads, axis=1).numpy()[0]
    
#     return grads

In [None]:
# X_train[0,:,1:]

In [None]:
# gradient_importance(X_train[0,:,1:], model)

In [None]:
# importances = []
# for i in range(0, X_train.shape[0]):
#     importances.append(gradient_importance(X_train[i,:,1:], model))

In [None]:
# importance = np.mean(np.array(importances), axis=0)
# importance

In [None]:
# plt.figure(figsize=(6,4))
# plt.title('Importância das Variáveis')

# plt.bar(df_ts.columns.values[:importance.shape[0]],importance.tolist())

# plt.savefig(f"{dir_figures}/feature_importance.png")
# plt.show()

In [None]:
### Normalização dos dados de treino/teste

# # Reshape the data to 2D
# X_train_reshaped = X_train.reshape(-1, X_train.shape[-1])
# X_test_reshaped = X_test.reshape(-1, X_test.shape[-1])
# y_train_reshaped = y_train.reshape(-1, y_train.shape[-1])
# y_test_reshaped = y_test.reshape(-1, y_test.shape[-1])

# # Initialize MinMaxScaler
# scaler_X = MinMaxScaler()
# scaler_y = MinMaxScaler()

# # Fit and transform on the training data (ignoring dates) # _ stands for normalized data
# X_train_ = scaler_X.fit_transform(X_train_reshaped[:,1:])
# y_train_ = scaler_y.fit_transform(y_train_reshaped[:,1:])

# # Transform the test data (ignoring dates)
# X_test_ = scaler_X.transform(X_test_reshaped[:,1:])
# y_test_ = scaler_y.transform(y_test_reshaped[:,1:])

# # Reshape back to the original shape (ignoring dates)
# X_train_ = X_train_.reshape(X_train[:,:,1:].shape).astype('float32')
# X_test_ = X_test_.reshape(X_test[:,:,1:].shape).astype('float32')
# y_train_ = y_train_.reshape(y_train[:,1:].shape).astype('float32')
# y_test_ = y_test_.reshape(y_test[:,1:].shape).astype('float32')