In [None]:
# Configuración del entorno (compatible con Colab y local)
import os
import sys
from pathlib import Path
import shutil
import time
import psutil

# Detectar si estamos en Google Colab
IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    # Si estamos en Colab, clonar el repositorio
    !git clone https://github.com/ninja-marduk/ml_precipitation_prediction.git
    %cd ml_precipitation_prediction
    # Instalar dependencias necesarias
    !pip install -r requirements.txt
    !pip install xarray netCDF4 optuna matplotlib seaborn lightgbm xgboost scikit-learn ace_tools
    BASE_PATH = '/content/drive/MyDrive/ml_precipitation_prediction'
else:
    # Si estamos en local, usar la ruta actual
    if '/models' in os.getcwd():
        BASE_PATH = Path('..')
    else:
        BASE_PATH = Path('.')

print(f"Entorno configurado. Usando ruta base: {BASE_PATH}")

# Si BASE_PATH viene como string, lo convertimos
BASE_PATH = Path(BASE_PATH)

# Ahora puedes concatenar correctamente
data_output_dir = BASE_PATH / 'data' / 'output'
model_output_dir = BASE_PATH / 'models' / 'output'


In [None]:
# Reimportar todo después del reset para ejecución del experimento multi-ventana
import os
import numpy as np
import pandas as pd
import xarray as xr
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    Dense, Conv2D, MaxPooling2D, Flatten,
    LSTM, GRU, Bidirectional, Reshape, Input
)
from tensorflow.keras.callbacks import EarlyStopping

import matplotlib.pyplot as plt

# Funciones
def create_sequences(X, y, window=12):
    X_seq, y_seq = [], []
    for i in range(len(X) - window):
        X_seq.append(X[i:i+window])
        y_seq.append(y[i+window])
    return np.array(X_seq), np.array(y_seq)

def build_model(model_type, input_shape, output_neurons):
    model = Sequential()
    model.add(Input(shape=input_shape))
    if model_type == 'LSTM':
        model.add(LSTM(64))
    elif model_type == 'GRU':
        model.add(GRU(64))
    elif model_type == 'BLSTM':
        model.add(Bidirectional(LSTM(64)))
    elif model_type == 'CNN':
        model.add(Reshape((*input_shape, 1)))
        model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
        model.add(MaxPooling2D((2, 2)))
        model.add(Flatten())
    model.add(Dense(output_neurons))
    model.compile(optimizer='adam', loss='mse')
    return model

def evaluate(y_true, y_pred):
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / (y_true + 1e-5))) * 100
    r2 = r2_score(y_true, y_pred)
    return rmse, mae, mape, r2

# Volver a ejecutar experimento multi-ventana
model_output_dir_STHWS = model_output_dir / 'ST_HybridWaveStack'
model_output_dir_STHWS.mkdir(parents=True, exist_ok=True)

file_path = data_output_dir / "complete_dataset_with_features_with_clusters_elevation.nc"
ds = xr.open_dataset(file_path)

experiment_settings = {
    "precip+time": ['month_sin', 'month_cos'],
    "precip+time+elev": ['month_sin', 'month_cos', 'elevation', 'slope', 'aspect'],
    "all_features": ['month_sin', 'month_cos', 'elevation', 'slope', 'aspect', 'cluster_elevation']
}
windows = [3, 6, 12]
multi_window_results = {
    'experiment': [],
    'window': [],
    'model': [],
    'RMSE': [],
    'MAE': [],
    'MAPE': [],
    'R2': []
}

for window_size in windows:
    for exp_name, variables in experiment_settings.items():
        print(f"\n🚀 Ventana {window_size} - Experimento: {exp_name}")

        try:
            cluster_elevation_index = variables.index('cluster_elevation') if 'cluster_elevation' in variables else None
            subset_array = ds[variables].to_array().transpose('time', 'latitude', 'longitude', 'variable')
            subset_np = subset_array.values

            if cluster_elevation_index is not None:
                cluster_data = subset_np[..., cluster_elevation_index]
                encoded = LabelEncoder().fit_transform(cluster_data.ravel()).reshape(cluster_data.shape)
                subset_np[..., cluster_elevation_index] = encoded

            subset_np = subset_np.astype(np.float32)
            target = ds['total_precipitation'].values

            samples, lat, lon, feats = subset_np.shape
            X = subset_np.reshape(samples, lat * lon, feats)
            y = target.reshape(samples, lat * lon)

            mask = ~np.isnan(y)
            X = X[mask]
            y = y[mask]

            X_seq, y_seq = create_sequences(X, y, window=window_size)
            X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, random_state=42)

            X_train_feed = X_train.reshape((X_train.shape[0], X_train.shape[1], -1))
            X_test_feed = X_test.reshape((X_test.shape[0], X_test.shape[1], -1))
            input_shape = (X_train_feed.shape[1], X_train_feed.shape[2])

            for model_name in ['LSTM', 'GRU', 'BLSTM', 'CNN']:
                print(f"\t🏗️ Entrenando {model_name} con ventana {window_size}...")
                model = build_model(model_name, input_shape, 1)
                es = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
                model.fit(X_train_feed, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=0, callbacks=[es])

                y_pred = model.predict(X_test_feed).flatten()
                y_true = y_test.flatten()

                rmse, mae, mape, r2 = evaluate(y_true, y_pred)
                multi_window_results['experiment'].append(exp_name)
                multi_window_results['window'].append(window_size)
                multi_window_results['model'].append(model_name)
                multi_window_results['RMSE'].append(rmse)
                multi_window_results['MAE'].append(mae)
                multi_window_results['MAPE'].append(mape)
                multi_window_results['R2'].append(r2)

                # Guardar modelo con nombre organizado
                file_name = f"{exp_name.replace('+', '_')}_{model_name}_win{window_size}.h5"
                model_path = model_output_dir_STHWS / file_name
                model.save(model_path)
                print(f"\t💾 Modelo guardado en: {model_path}")

        except Exception as err:
            print(f"❌ Error en experimento '{exp_name}' (ventana {window_size}): {err}")

results_df = pd.DataFrame(multi_window_results)
results_df.sort_values(by=['window', 'experiment', 'RMSE'], inplace=True)
results_df.to_csv('resultados_comparativos_modelos_multiwindow.csv', index=False)

import ace_tools as tools; tools.display_dataframe_to_user(name="Resultados Comparativos Multi-Ventana", dataframe=results_df)
