In [None]:

import torch.utils.data as data
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import tensorflow as tf
from scikeras.wrappers import KerasRegressor 
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
import seaborn as sns
import sys, os

sys.path.append(os.path.abspath("../.."))
import utils.lstmModel as lstm
from utils.logger import Logger

In [None]:
import sys, os
sys.path.append(os.path.abspath(".."))
import access_merge as access_merge
access_merge.acessar_dados_merge_lat_long()
timeseries = access_merge.acessar_dados_merge()
timeseries['chuva'] = np.log1p(timeseries['chuva'])

num_features = 18
# 2. FEATURES TEMPORAIS
timeseries['dia_seno'] = np.sin(2 * np.pi * timeseries.index.dayofyear / 365)
timeseries['dia_cosseno'] = np.cos(2 * np.pi * timeseries.index.dayofyear / 365)
timeseries['mes_seno'] = np.sin(2 * np.pi * timeseries.index.month / 12)
timeseries['mes_cosseno'] = np.cos(2 * np.pi * timeseries.index.month / 12)
timeseries['ano'] = timeseries.index.year - timeseries.index.year.min()


timeseries['chuva_ma3']  = timeseries['chuva'].shift(1).rolling(window=3, min_periods=1).mean().fillna(0)
timeseries['chuva_ma7']  = timeseries['chuva'].shift(1).rolling(window=7, min_periods=1).mean().fillna(0)
timeseries['chuva_ma14'] = timeseries['chuva'].shift(1).rolling(window=14, min_periods=1).mean().fillna(0)
timeseries['chuva_ma30'] = timeseries['chuva'].shift(1).rolling(window=30, min_periods=1).mean().fillna(0)

# 2. Estatísticas móveis
timeseries['chuva_std7'] = timeseries['chuva'].shift(1).rolling(window=7, min_periods=1).std().fillna(0)
timeseries['chuva_max7'] = timeseries['chuva'].shift(1).rolling(window=7, min_periods=1).max().fillna(0)
timeseries['chuva_min7'] = timeseries['chuva'].shift(1).rolling(window=7, min_periods=1).min().fillna(0)

# 3. Lags
timeseries['chuva_lag1'] = timeseries['chuva'].shift(1).fillna(0)
timeseries['chuva_lag3'] = timeseries['chuva'].shift(3).fillna(0)
timeseries['chuva_lag7'] = timeseries['chuva'].shift(7).fillna(0)

# 5. Flags binários
timeseries['choveu_ontem'] = (timeseries['chuva_lag1'] > 0).astype(int)
timeseries['choveu_semana'] = (timeseries['chuva_ma7'] > 0).astype(int)

features_dinamicas = [col for col in timeseries.columns if 'chuva' in col]
features_sazonais = ['dia_seno', 'dia_cosseno', 'mes_seno', 'mes_cosseno', 'ano']
scaler_chuva = MinMaxScaler()
timeseries[features_dinamicas] = scaler_chuva.fit_transform(timeseries[features_dinamicas])

logger.info(f"Dados carregados com sucesso. Total de {len(timeseries)} registros.")
datas = timeseries.index 

plt.plot(timeseries['chuva'])
plt.title('Daily Rain in station')
plt.xlabel('Date')
plt.ylabel('mm')
plt.show()

plt.figure(figsize=(12,8))
corr = timeseries.corr(numeric_only=True)
sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm")
plt.title("Correlação entre as variáveis")
plt.show()

Pontos no bbox do Rio: 21
   latitude  longitude
0    -23.05     -43.75
1    -23.05     -43.65
2    -23.05     -43.55
3    -23.05     -43.45
4    -23.05     -43.35


FileNotFoundError: [Errno 2] No such file or directory: '/home/pbose/dataset/merge/'

In [None]:
lookback = 14
logger.info(f"Preparando sequências com um lookback de {lookback} dias.")
X, y = lstm.create_sequence(timeseries.values, lookback)
dates_aligned = datas[lookback:]

train_size = int(len(X) * 0.70)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]
train_date, test_date = dates_aligned[:train_size] , dates_aligned[train_size:]
logger.info(f"Sequências criadas. Treino: {len(X_train)} amostras, Teste: {len(X_test)} amostras.")
logger.info(f"Shape {X_train.shape}")



In [None]:
model_wrapper = KerasRegressor(model=lstm.criar_modelo_avancado, verbose=0)
n_splits = 3 # Número de divisões para a validação cruzada
tscv = TimeSeriesSplit(n_splits=n_splits)

# Cria a instância do GridSearchCV
grid = GridSearchCV(
    estimator=model_wrapper,
    param_grid=param_grid,
    cv=tscv,          # Validação cruzada para séries temporais
    n_jobs=-1,        # Usar todos os processadores disponíveis
    verbose=2         # Mostra o progresso
)

# Inicia a busca (isso pode demorar MUITO)
grid_result = grid.fit(X_train, y_train) 

logger.info("GridSearchCV finalizado.")
logger.info(f"Melhores parâmetros encontrados: {grid_result.best_params_}")
logger.info(f"Melhor score de validação cruzada (negativo da MSE): {grid_result.best_score_:.4f}")

# Você pode pegar o melhor modelo treinado para fazer previsões
best_model = grid_result.best_estimator_

In [None]:
epochs = 1000
batch_size = 32
# Building the model

model = lstm.criar_modelo_avancado(
    lookback=lookback,
    n_features=num_features,
    units_camada1=128,
    units_camada2=64,
    dropout_rate=0.2
)
logger.info(f"Iniciando treinamento por {epochs} épocas. Com batch_size: {batch_size}")


print(model.summary())
early_stopping = EarlyStopping(monitor='val_loss', patience=30, restore_best_weights=True)
# patience=15: espera 30 épocas sem melhora antes de parar

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.25, patience=25, min_lr=1e-6, verbose=1)
# factor=0.2: reduz o learning rate para 20% do valor atual
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=epochs,
    batch_size=batch_size
    #callbacks=[ early_stopping, reduce_lr ]
)
logger.info("Treinamento concluído.")


In [None]:
# --- Previsão ---
logger.info("Realizando previsões no conjunto de teste.")
pred = model.predict(X_test)

n_features_chuva = scaler_chuva.n_features_in_
pred_dummy = np.zeros((len(pred), n_features_chuva))
pred_dummy[:, 0] = pred.flatten()
pred_log = scaler_chuva.inverse_transform(pred_dummy)[:, 0]
pred = np.expm1(pred_log)

y_test_dummy = np.zeros((len(y_test), n_features_chuva))
y_test_dummy[:, 0] = y_test.flatten()
y_testlog = scaler_chuva.inverse_transform(y_test_dummy )[:, 0]
y_test = np.expm1(y_testlog)

rmse = np.sqrt(mean_squared_error(y_test, pred))
mse = mean_squared_error(y_test, pred)
mae = mean_absolute_error(y_test, pred)
logger.info("--- Métricas de Avaliação no Conjunto de Teste ---")
logger.info(f"RMSE (Raiz do Erro Quadrático Médio): {rmse:.4f}")
logger.info(f"MSE (Erro Quadrático Médio): {mse:.4f}")
logger.info(f"MAE (Erro Absoluto Médio): {mae:.4f}")
logger.info("-------------------------------------------------")

In [None]:
logger.info("Gerando gráfico de previsão final.")
plt.figure(figsize=(12,6))
plt.plot(test_date, y_test, label="Real")
plt.plot(test_date, pred, label="Previsto")
plt.legend()
plt.title("Previsão de Chuva com BiLSTM")
plt.xlabel("Data")
plt.ylabel("Chuva")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
logger.info("Script BiLSTM BrDwgd(TensorFlow/Keras) finalizado.")