In [None]:
# Configuración del entorno (compatible con Colab y local)
import os
import sys
from pathlib import Path
import shutil
import time
import psutil

# Detectar si estamos en Google Colab
IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    # Si estamos en Colab, clonar el repositorio
    !git clone https://github.com/ninja-marduk/ml_precipitation_prediction.git
    %cd ml_precipitation_prediction
    # Instalar dependencias necesarias
    !pip install -r requirements.txt
    !pip install xarray netCDF4 optuna matplotlib seaborn lightgbm xgboost scikit-learn ace_tools
    BASE_PATH = '/content/drive/MyDrive/ml_precipitation_prediction'
else:
    # Si estamos en local, usar la ruta actual
    if '/models' in os.getcwd():
        BASE_PATH = Path('..')
    else:
        BASE_PATH = Path('.')

print(f"Entorno configurado. Usando ruta base: {BASE_PATH}")

# Si BASE_PATH viene como string, lo convertimos
BASE_PATH = Path(BASE_PATH)

# Ahora puedes concatenar correctamente
data_output_dir = BASE_PATH / 'data' / 'output'
model_output_dir = BASE_PATH / 'models' / 'output'


In [None]:
# Versión final optimizada con validación cruzada, curvas por fold, y trazabilidad completa
import os
import numpy as np
import pandas as pd
import xarray as xr
from pathlib import Path
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.python.client import device_lib

# ==== Configuración de entorno ====
print("🔍 Detectando dispositivo disponible...")
gpu_devices = tf.config.list_physical_devices('GPU')
USE_GPU = bool(gpu_devices)

if USE_GPU:
    print("✅ GPU detectada:", gpu_devices[0].name)
    try:
        from tensorflow.keras import mixed_precision
        mixed_precision.set_global_policy('mixed_float16')
        print("⚡ Política 'mixed_float16' activada.")
    except Exception as e:
        print(f"⚠️ No se pudo activar mixed precision: {e}")
else:
    print("⚠️ No se detectó GPU. Usando CPU.")
    print("ℹ️ En Colab puedes activar GPU en Entorno de ejecución > Cambiar tipo de entorno de ejecución.")

# ==== Funciones auxiliares ====
def build_model(model_type, input_shape, output_neurons):
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, LSTM, GRU, Bidirectional, Reshape, Input
    model = Sequential()
    model.add(Input(shape=input_shape))
    if model_type == 'LSTM':
        model.add(LSTM(64))
    elif model_type == 'GRU':
        model.add(GRU(64))
    elif model_type == 'BLSTM':
        model.add(Bidirectional(LSTM(64)))
    elif model_type == 'CNN':
        model.add(Reshape((*input_shape, 1)))
        model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
        model.add(MaxPooling2D((2, 2)))
        model.add(Flatten())
    model.add(Dense(output_neurons))
    model.compile(optimizer='adam', loss='mse')
    return model

def evaluate(y_true, y_pred):
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / (y_true + 1e-5))) * 100
    r2 = r2_score(y_true, y_pred)
    return rmse, mae, mape, r2

def to_dataset(x, y):
    return tf.data.Dataset.from_tensor_slices((x, y)).batch(16).prefetch(tf.data.AUTOTUNE)

# ==== Directorios y Dataset ====
print("📂 Configurando directorios y cargando dataset...")
try:
    model_output_dir = Path("output/ST_HybridWaveStack")
    curves_dir = model_output_dir / "learning_curves"

    if not model_output_dir.exists():
        model_output_dir.mkdir(parents=True)
        print(f"📁 Carpeta creada: {model_output_dir}")
    else:
        print(f"📁 Carpeta ya existe: {model_output_dir}")

    if not curves_dir.exists():
        curves_dir.mkdir(parents=True)
        print(f"📁 Subcarpeta creada: {curves_dir}")
    else:
        print(f"📁 Subcarpeta ya existe: {curves_dir}")

    file_path = data_output_dir / "complete_dataset_with_features_with_clusters_elevation_with_windows.nc"
    ds = xr.open_dataset(file_path)
    print(f"✔️ Dataset cargado desde: {file_path}")

except Exception as e:
    raise RuntimeError(f"❌ Error cargando dataset o creando carpetas: {e}")


experiment_settings = {
    "time+cycles": ['year', 'month', 'month_sin', 'month_cos', 'doy_sin', 'doy_cos'],
    "time+cycles+lag": ['year', 'month', 'month_sin', 'month_cos', 'doy_sin', 'doy_cos',
                        'total_precipitation_lag1', 'total_precipitation_lag2', 'total_precipitation_lag3',
                        'total_precipitation_lag4', 'total_precipitation_lag12', 'total_precipitation_lag24', 'total_precipitation_lag36'],
    "time+cycles+lag+elev": ['year', 'month', 'month_sin', 'month_cos', 'doy_sin', 'doy_cos',
                             'total_precipitation_lag1', 'total_precipitation_lag2', 'total_precipitation_lag3',
                             'total_precipitation_lag4', 'total_precipitation_lag12', 'total_precipitation_lag24', 'total_precipitation_lag36',
                             'elevation', 'slope', 'aspect'],
    "all_features": ['year', 'month', 'month_sin', 'month_cos', 'doy_sin', 'doy_cos',
                     'total_precipitation_lag1', 'total_precipitation_lag2', 'total_precipitation_lag3',
                     'total_precipitation_lag4', 'total_precipitation_lag12', 'total_precipitation_lag24', 'total_precipitation_lag36',
                     'elevation', 'slope', 'aspect', 'cluster_elevation']
}

ds_vars = set(ds.data_vars)
for name, vars_list in experiment_settings.items():
    missing = [v for v in vars_list if v not in ds_vars]
    if missing:
        raise ValueError(f"❌ Faltan variables necesarias para el experimento '{name}': {missing}")
print("✅ Todas las variables requeridas están presentes.")

model_types = ['LSTM', 'GRU', 'BLSTM', 'CNN']
prediction_horizons = [3, 6, 12]
input_window = 96  # 8 años (mensual)

results = []

# ==== Loop de experimentos ====
for exp_name, variables in experiment_settings.items():
    print(f"\n🚀 Iniciando experimento: {exp_name}")
    try:
        cluster_elevation_index = variables.index('cluster_elevation') if 'cluster_elevation' in variables else None
        subset_array = ds[variables].to_array().transpose('time', 'latitude', 'longitude', 'variable')
        subset_np = subset_array.values

        if cluster_elevation_index is not None:
            print("🔄 Codificando 'cluster_elevation'...")
            cluster_data = subset_np[..., cluster_elevation_index]
            encoded = LabelEncoder().fit_transform(cluster_data.ravel()).reshape(cluster_data.shape)
            subset_np[..., cluster_elevation_index] = encoded

        subset_np = subset_np.astype(np.float32)
        target = ds['total_precipitation'].values

        samples, lat, lon, feats = subset_np.shape
        X = subset_np.reshape(samples, lat * lon, feats)
        y = target.reshape(samples, lat * lon)

        mask = ~np.isnan(y)
        X = X[mask]
        y = y[mask]

        print("🧱 Generando secuencias multihorizonte...")
        X_seq = []
        Y_targets = {h: [] for h in prediction_horizons}
        for i in range(len(X) - input_window - max(prediction_horizons)):
            X_seq.append(X[i:i + input_window])
            for h in prediction_horizons:
                Y_targets[h].append(y[i + input_window + h - 1])
        X_seq = np.array(X_seq)
        Y_targets = {h: np.array(Y_targets[h]) for h in prediction_horizons}
        input_shape = (X_seq.shape[1], X_seq.shape[2])

        for model_name in model_types:
            print(f"\n🏗️ Modelo: {model_name}")
            for h in prediction_horizons:
                print(f"🕒 Validación cruzada - Horizonte {h} meses")
                kf = KFold(n_splits=5, shuffle=True, random_state=42)
                fold = 1

                for train_idx, val_idx in kf.split(X_seq):
                    print(f"🔁 Fold {fold}...")
                    model = build_model(model_name, input_shape, 1)
                    es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

                    X_train_fold, X_val_fold = X_seq[train_idx], X_seq[val_idx]
                    y_train_fold, y_val_fold = Y_targets[h][train_idx], Y_targets[h][val_idx]

                    train_ds = to_dataset(X_train_fold, y_train_fold)
                    val_ds = to_dataset(X_val_fold, y_val_fold)

                    history = model.fit(train_ds,
                                        validation_data=val_ds,
                                        epochs=20,
                                        verbose=1,
                                        callbacks=[es])

                    y_pred = model.predict(X_val_fold).flatten()
                    y_true = y_val_fold.flatten()
                    rmse, mae, mape, r2 = evaluate(y_true, y_pred)

                    results.append({
                        'experiment': exp_name,
                        'model': model_name,
                        'horizon': h,
                        'fold': fold,
                        'RMSE': rmse,
                        'MAE': mae,
                        'MAPE': mape,
                        'R2': r2,
                        'epochs': len(history.history['loss'])
                    })

                    # Guardar curva del fold
                    plt.figure()
                    plt.plot(history.history['loss'], label='Train')
                    plt.plot(history.history['val_loss'], label='Validation')
                    plt.title(f'{exp_name} - {model_name} - H{h} - Fold {fold}')
                    plt.xlabel('Epochs')
                    plt.ylabel('Loss')
                    plt.legend()
                    curve_path = curves_dir / f"{exp_name.replace('+','_')}_{model_name}_H{h}_F{fold}.png"
                    plt.savefig(curve_path)
                    plt.close()

                    # Guardar modelo
                    model_path = model_output_dir / f"{exp_name.replace('+','_')}_{model_name}_H{h}_F{fold}.h5"
                    model.save(model_path)
                    fold += 1

    except Exception as e:
        print(f"❌ Error en experimento '{exp_name}': {e}")

# ==== Guardar resultados ====
results_df = pd.DataFrame(results)
results_df.to_csv("resultados_modelos_cv_8anios.csv", index=False)

import ace_tools as tools
tools.display_dataframe_to_user(name="Resultados Modelos CV 8 años", dataframe=results_df)
