In [1]:
import os
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score
import json
import optuna
from functools import partial
from joblib import Memory

In [2]:
def load_clustering_info():
    """Load clustering information from CSV file"""
    cluster_file = "/kaggle/input/ai-data4clustering-dict/fuzzy_clusters.csv"
    try:
        clusters_df = pd.read_csv(cluster_file)
        print(f" Loaded clustering info: {clusters_df.shape[0]} rows")
        # Check if required columns exist
        required_cols = ['ROW', 'COL', 'CLUSTER']
        if not all(col in clusters_df.columns for col in required_cols):
            print(f"[ERRORS] Clustering file missing required columns: {required_cols}")
            return None
        return clusters_df
    except Exception as e:
        print(f"[ERRORS] Error loading clustering file: {str(e)}")
        return None

In [3]:


# Setup memory cache
cache_dir = './joblib_cache'
memory = Memory(cache_dir, verbose=0)

class Config:
    TARGET = 'AWS'
    USE_LAG_FEATURES = True
    USE_ROLLING_STATISTICS = True

# Paths
base_path = "/kaggle/input/ai-dataimputedataset-k-fold"
months = ["2019-04", "2019-10", "2020-04", "2020-10"]
folds = [f"fold_{i}" for i in range(1, 6)]

# Define selected features
selected_features = [
    'TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV'
]

# Device configuration
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Model parameters - now configurable via Optuna
MODEL_CONFIG = {
    "04": {"BATCH_SIZE": 64, "EPOCHS": 10},
    "10": {"BATCH_SIZE": 128, "EPOCHS": 15},
}

# Create lag features
@memory.cache
def create_lag_features(train_df, test_df, target_column, lag_steps, groupby_cols):
    """Create lag features for the target column"""
    result_df = test_df.copy()
    
    # Combine train and test for continuous time series
    combined_df = pd.concat([train_df, test_df]).sort_values('DATETIME').reset_index(drop=True)
    
    # Create lag features
    for lag in lag_steps:
        combined_df[f'{target_column}_lag{lag}'] = combined_df.groupby(groupby_cols)[target_column].shift(lag)
    
    # Extract only the test portion with lag features
    result_df = combined_df.iloc[len(train_df):].reset_index(drop=True)
    return result_df

# Create rolling statistics
@memory.cache
def create_rolling_statistics(train_df, test_df, target_column, window_sizes, groupby_cols):
    """Create rolling statistics features for the target column"""
    result_df = test_df.copy()
    
    # Combine train and test for continuous rolling stats
    combined_df = pd.concat([train_df, test_df]).sort_values('DATETIME').reset_index(drop=True)
    
    # Create rolling features
    for window in window_sizes:
        # Rolling mean
        combined_df[f'{target_column}_rollmean_{window}'] = combined_df.groupby(groupby_cols)[target_column].transform(
            lambda x: x.rolling(window, min_periods=1).mean())
        # Rolling std
        combined_df[f'{target_column}_rollstd_{window}'] = combined_df.groupby(groupby_cols)[target_column].transform(
            lambda x: x.rolling(window, min_periods=1).std())
    
    # Extract only the test portion with rolling features
    result_df = combined_df.iloc[len(train_df):].reset_index(drop=True)
    return result_df

# Handle missing values
@memory.cache
def handle_missing_values(df, lag_steps, window_sizes):
    """Handle missing values in the dataframe"""
    result_df = df.copy()
    
    # Fill NaN values in lag features with 0
    for lag in lag_steps:
        lag_col = f'{Config.TARGET}_lag{lag}'
        if lag_col in result_df.columns:
            result_df[lag_col] = result_df[lag_col].fillna(0)
    
    # Fill NaN values in rolling features with 0
    for window in window_sizes:
        mean_col = f'{Config.TARGET}_rollmean_{window}'
        std_col = f'{Config.TARGET}_rollstd_{window}'
        
        if mean_col in result_df.columns:
            result_df[mean_col] = result_df[mean_col].fillna(0)
        
        if std_col in result_df.columns:
            result_df[std_col] = result_df[std_col].fillna(0)
    
    # Fill remaining NaNs with 0
    result_df = result_df.fillna(0)
    
    return result_df



# Modified load_and_process_data function to filter features
@memory.cache
@memory.cache
def load_and_process_data(file_path, train_file_path=None, lag_steps=None, window_sizes=None, cluster_df=None, cluster_id=None):
    """
    Load and process data from file_path with optional lag features and rolling statistics
    If train_file_path is provided, use it for creating lag and rolling features
    If cluster_df and cluster_id are provided, filter data by cluster
    """
    
    try:
        raw_df = pd.read_csv(file_path)
        
        # Filter by cluster if specified
        if cluster_df is not None and cluster_id is not None:
            # Get ROW, COL pairs in this cluster
            cluster_points = cluster_df[cluster_df['CLUSTER'] == cluster_id][['ROW', 'COL']].values.tolist()
            
            # Filter raw_df to keep only points in this cluster
            raw_df = raw_df[raw_df.apply(lambda row: [row['ROW'], row['COL']] in cluster_points, axis=1)]
            
            if raw_df.empty:
                print(f" No data points found for cluster {cluster_id}")
                return pd.DataFrame()
        
        # Filter to keep only selected features plus essential columns
        essential_cols = ['DATETIME', 'ROW', 'COL', Config.TARGET]
        feature_cols = [col for col in selected_features if col in raw_df.columns]
        filtered_cols = essential_cols + feature_cols
        
        # Keep only needed columns
        raw_df = raw_df[filtered_cols]
        
        # Convert target to numeric and handle NaN/inf values
        raw_df[Config.TARGET] = pd.to_numeric(raw_df[Config.TARGET], errors='coerce')
        raw_df = raw_df.replace([np.inf, -np.inf], np.nan)
        df = raw_df.dropna(subset=[Config.TARGET]).copy()
        
        # Sort by datetime for proper sequence handling
        if 'DATETIME' in df.columns:
            df = df.sort_values("DATETIME").reset_index(drop=True)
        
        # If we're in train mode or not creating lag/rolling features
        if train_file_path is None or (lag_steps is None and window_sizes is None):
            return df
        
        # Otherwise, we're in eval mode and need to carefully create features
        train_df = pd.read_csv(train_file_path)
        
        # Filter training data by cluster if specified
        if cluster_df is not None and cluster_id is not None:
            cluster_points = cluster_df[cluster_df['CLUSTER'] == cluster_id][['ROW', 'COL']].values.tolist()
            train_df = train_df[train_df.apply(lambda row: [row['ROW'], row['COL']] in cluster_points, axis=1)]
            
            if train_df.empty:
                print(f" No training data points found for cluster {cluster_id}")
                return pd.DataFrame()
        
        # Filter training data to keep only selected features
        train_df = train_df[filtered_cols]
        
        train_df[Config.TARGET] = pd.to_numeric(train_df[Config.TARGET], errors='coerce')
        train_df = train_df.replace([np.inf, -np.inf], np.nan)
        train_df = train_df.dropna(subset=[Config.TARGET]).copy()
        
        if 'DATETIME' in train_df.columns:
            train_df = train_df.sort_values("DATETIME").reset_index(drop=True)
        
        # Create lag features if needed
        if Config.USE_LAG_FEATURES and lag_steps:
            df = create_lag_features(train_df, df, Config.TARGET, lag_steps, ['ROW', 'COL'])
        
        # Create rolling statistics if needed
        if Config.USE_ROLLING_STATISTICS and window_sizes:
            df = create_rolling_statistics(train_df, df, Config.TARGET, window_sizes, ['ROW', 'COL'])
        
        # Handle missing values
        if lag_steps or window_sizes:
            df = handle_missing_values(df, lag_steps or [], window_sizes or [])
        return df
        
    except Exception as e:
        print(f"[ERRORS] Error loading or processing data: {str(e)}")
        return pd.DataFrame()

# Enhanced LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size=64, num_layers=2, dropout=0.0, time_step_out=1):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, time_step_out)
        self.time_step_out = time_step_out

    def forward(self, x):
        out, _ = self.lstm(x)
        return self.fc(out[:, -1, :])

# Create sequences with configurable input and output time steps
def create_sequences(df, input_cols, target_col, time_step_in, time_step_out=1, stride=1):
    """
    Create sequences from dataframe with configurable input and output time steps
    - time_step_in: number of time steps for input
    - time_step_out: number of future steps to predict
    - stride: step size for sliding window
    """
    sequences, targets = [], []
    grouped = df.groupby(['ROW', 'COL'])
    
    for _, group in grouped:
        # Make sure group is sorted by time
        if 'DATETIME' in group.columns:
            group = group.sort_values("DATETIME")
            
        data = group[input_cols].values
        target_data = group[target_col].values
        
        if len(data) < time_step_in + time_step_out:
            continue
        
        for i in range(0, len(data) - time_step_in - time_step_out + 1, stride):
            seq = data[i:i+time_step_in]
            if time_step_out == 1:
                target = target_data[i+time_step_in]
                targets.append(target)
            else:
                target = target_data[i+time_step_in:i+time_step_in+time_step_out]
                targets.append(target)
            sequences.append(seq)
            
    if not sequences:
        return torch.tensor([]), torch.tensor([])
    
    if time_step_out == 1:
        return torch.tensor(sequences, dtype=torch.float32), torch.tensor(targets, dtype=torch.float32).unsqueeze(1)
    else:
        return torch.tensor(sequences, dtype=torch.float32), torch.tensor(targets, dtype=torch.float32)

# Prepare data loaders
# Modified prepare_data_loaders function
def prepare_data_loaders(month, fold, lag_steps, window_sizes, time_step_in, time_step_out, batch_size, stride=1, cluster_df=None, cluster_id=None):
    """Prepare data loaders with specific time steps and features for a specific cluster"""
    folder = os.path.join(base_path, month, fold)
    
    # Load train data
    train_df = load_and_process_data(
        os.path.join(folder, "processed_train.csv"), 
        cluster_df=cluster_df, 
        cluster_id=cluster_id
    )
    
    # Load validation data
    val_df = load_and_process_data(
        os.path.join(folder, "processed_val.csv"), 
        cluster_df=cluster_df, 
        cluster_id=cluster_id
    )
    
    # Load test data
    test_df = load_and_process_data(
        os.path.join(folder, "merged_test.csv"), 
        cluster_df=cluster_df, 
        cluster_id=cluster_id
    )
    
    if train_df.empty or val_df.empty or test_df.empty:
        print(f" One or more datasets are empty for cluster {cluster_id}")
        return None, None, None, 0
    
    # Rest of the function remains the same as your original code...
    # Sort data by time
    if 'DATETIME' in train_df.columns:
        train_df = train_df.sort_values("DATETIME").reset_index(drop=True)
    if 'DATETIME' in val_df.columns:
        val_df = val_df.sort_values("DATETIME").reset_index(drop=True)
    if 'DATETIME' in test_df.columns:
        test_df = test_df.sort_values("DATETIME").reset_index(drop=True)
    
    # Create features before further processing
    # 1. Create lag features separately for each dataset
    if Config.USE_LAG_FEATURES and lag_steps:
        # Create lag features for train set using itself
        for lag in lag_steps:
            train_df[f'{Config.TARGET}_lag{lag}'] = train_df.groupby(['ROW', 'COL'])[Config.TARGET].shift(lag)
        
        # Create lag features for validation set using train + val
        train_val_df = pd.concat([train_df, val_df]).sort_values('DATETIME').reset_index(drop=True)
        for lag in lag_steps:
            train_val_df[f'{Config.TARGET}_lag{lag}'] = train_val_df.groupby(['ROW', 'COL'])[Config.TARGET].shift(lag)
        val_df = train_val_df.iloc[len(train_df):].reset_index(drop=True)
        
        # Create lag features for test set using train + val + test
        full_df = pd.concat([train_df, val_df, test_df]).sort_values('DATETIME').reset_index(drop=True)
        for lag in lag_steps:
            full_df[f'{Config.TARGET}_lag{lag}'] = full_df.groupby(['ROW', 'COL'])[Config.TARGET].shift(lag)
        test_df = full_df.iloc[len(train_df) + len(val_df):].reset_index(drop=True)
    
    # 2. Create rolling statistics separately for each dataset
    if Config.USE_ROLLING_STATISTICS and window_sizes:
        # Create rolling stats for train set
        for window in window_sizes:
            train_df[f'{Config.TARGET}_rollmean_{window}'] = train_df.groupby(['ROW', 'COL'])[Config.TARGET].transform(
                lambda x: x.rolling(window, min_periods=1).mean())
            train_df[f'{Config.TARGET}_rollstd_{window}'] = train_df.groupby(['ROW', 'COL'])[Config.TARGET].transform(
                lambda x: x.rolling(window, min_periods=1).std())
        
        # Create rolling stats for validation set
        train_val_df = pd.concat([train_df, val_df]).sort_values('DATETIME').reset_index(drop=True)
        for window in window_sizes:
            train_val_df[f'{Config.TARGET}_rollmean_{window}'] = train_val_df.groupby(['ROW', 'COL'])[Config.TARGET].transform(
                lambda x: x.rolling(window, min_periods=1).mean())
            train_val_df[f'{Config.TARGET}_rollstd_{window}'] = train_val_df.groupby(['ROW', 'COL'])[Config.TARGET].transform(
                lambda x: x.rolling(window, min_periods=1).std())
        val_df = train_val_df.iloc[len(train_df):].reset_index(drop=True)
        
        # Create rolling stats for test set
        full_df = pd.concat([train_df, val_df, test_df]).sort_values('DATETIME').reset_index(drop=True)
        for window in window_sizes:
            full_df[f'{Config.TARGET}_rollmean_{window}'] = full_df.groupby(['ROW', 'COL'])[Config.TARGET].transform(
                lambda x: x.rolling(window, min_periods=1).mean())
            full_df[f'{Config.TARGET}_rollstd_{window}'] = full_df.groupby(['ROW', 'COL'])[Config.TARGET].transform(
                lambda x: x.rolling(window, min_periods=1).std())
        test_df = full_df.iloc[len(train_df) + len(val_df):].reset_index(drop=True)
    
    # Handle missing values
    train_df = train_df.fillna(0)
    val_df = val_df.fillna(0)
    test_df = test_df.fillna(0)
    
    # Prepare feature columns - exclude DATETIME, ROW, COL and target column
    # Include selected_features that are in the DataFrame
    basic_cols = [col for col in selected_features if col in train_df.columns]
    lag_cols = [f'{Config.TARGET}_lag{lag}' for lag in lag_steps if f'{Config.TARGET}_lag{lag}' in train_df.columns]
    roll_cols = []
    
    for window in window_sizes:
        mean_col = f'{Config.TARGET}_rollmean_{window}'
        std_col = f'{Config.TARGET}_rollstd_{window}'
        if mean_col in train_df.columns:
            roll_cols.append(mean_col)
        if std_col in train_df.columns:
            roll_cols.append(std_col)
    
    feature_cols = basic_cols + lag_cols + roll_cols
    
    # Check if we have any features
    if not feature_cols:
        print(" No features detected! Creating default lag feature.")
        # Create at least one default lag feature
        default_lag = 1
        for df in [train_df, val_df, test_df]:
            df[f'{Config.TARGET}_lag{default_lag}'] = df.groupby(['ROW', 'COL'])[Config.TARGET].shift(default_lag)
            df = df.fillna(0)
        
        feature_cols = [f'{Config.TARGET}_lag{default_lag}']
    
    # Create sequences
    train_x, train_y = create_sequences(train_df, feature_cols, Config.TARGET, time_step_in, time_step_out, stride)
    val_x, val_y = create_sequences(val_df, feature_cols, Config.TARGET, time_step_in, time_step_out, stride)
    test_x, test_y = create_sequences(test_df, feature_cols, Config.TARGET, time_step_in, time_step_out, stride)
    
    if train_x.numel() == 0 or val_x.numel() == 0 or test_x.numel() == 0:
        print(f" One or more sequence sets are empty for cluster {cluster_id}")
        return None, None, None, 0
    
    # Create data loaders
    train_loader = DataLoader(TensorDataset(train_x, train_y), batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(TensorDataset(val_x, val_y), batch_size=batch_size)
    test_loader = DataLoader(TensorDataset(test_x, test_y), batch_size=batch_size)
    
    return train_loader, val_loader, test_loader, train_x.shape[2]

# Evaluate model
def evaluate_model(model, loader):
    """Evaluate model and return metrics"""
    model.eval()
    preds, targets = [], []
    
    with torch.no_grad():
        for xb, yb in loader:
            xb = xb.to(DEVICE)
            pred = model(xb).cpu()
            preds.append(pred)
            targets.append(yb)
    
    preds = torch.cat(preds).squeeze().numpy()
    targets = torch.cat(targets).squeeze().numpy()
    
    # Handle multi-step output
    if len(preds.shape) > 1 and preds.shape[1] > 1:
        # For multi-step evaluation, we'll calculate metrics on the last predicted step
        preds_last = preds[:, -1]
        targets_last = targets[:, -1] if len(targets.shape) > 1 else targets
        
        rmse = mean_squared_error(targets_last, preds_last, squared=False)
        bias = np.mean(preds_last - targets_last)
        r = r2_score(targets_last, preds_last)
        csi = np.sum((preds_last > 0.1) & (targets_last > 0.1)) / (np.sum((preds_last > 0.1) | (targets_last > 0.1)) + 1e-9)
    else:
        rmse = mean_squared_error(targets, preds, squared=False)
        bias = np.mean(preds - targets)
        r = r2_score(targets, preds)
        csi = np.sum((preds > 0.1) & (targets > 0.1)) / (np.sum((preds > 0.1) | (targets > 0.1)) + 1e-9)
    
    return rmse, bias, r, csi

In [4]:
def run_cluster_evaluation_from_checkpoint():
    """Run evaluation on clusters using pre-trained models from checkpoints"""
    # Load clustering information
    cluster_df = load_clustering_info()
    if cluster_df is None:
        print("[ERRORS] Failed to load clustering information")
        return
    
    # Get unique cluster IDs
    cluster_ids = cluster_df['CLUSTER'].unique()
    
    # Results storage
    all_results = []
    best_month_results = []  # Store best fold results for each month
    
    for month in months:
        print(f"\n###### Processing month: {month}")
        
        # Load best parameters for this month
        best_params_path = f"/kaggle/input/lstm-checkpoint/best_params_{month}.json"
        try:
            with open(best_params_path, "r") as f:
                best_params = json.load(f)
            print(f" Loaded best parameters from {best_params_path}")
        except Exception as e:
            print(f"[ERRORS] Error loading best parameters: {str(e)}")
            continue
        
        # Store results for all folds of this month
        month_fold_results = []
        
        # Process each fold for this month
        for fold in folds:
            print(f"\n##### Processing {month}, {fold}")
            
            # Process each cluster
            cluster_results = []
            for cluster_id in cluster_ids:
                print(f"\n##### Processing cluster {cluster_id}")
                # Use our evaluation function
                result = evaluate_cluster_with_checkpoint(month, fold, best_params, cluster_df, cluster_id)
                if result:
                    cluster_results.append(result)
                    all_results.append(result)
            
            # Calculate mean performance across clusters for this fold
            if cluster_results:
                mean_rmse = sum(r["test_rmse"] for r in cluster_results) / len(cluster_results)
                mean_bias = sum(r["test_bias"] for r in cluster_results) / len(cluster_results)
                mean_r = sum(r["test_r"] for r in cluster_results) / len(cluster_results)
                mean_csi = sum(r["test_csi"] for r in cluster_results) / len(cluster_results)
                
                # Create fold summary
                fold_summary = {
                    "month": month,
                    "fold": fold,
                    "mean_test_rmse": round(mean_rmse, 4),
                    "mean_test_bias": round(mean_bias, 4),
                    "mean_test_r": round(mean_r, 4),
                    "mean_test_csi": round(mean_csi, 4),
                    "num_clusters": len(cluster_results)
                }
                
                # Add to month's fold results
                month_fold_results.append(fold_summary)
                
                print(f"\n Mean test performance across clusters for {month}, {fold}:")
                print(f"  RMSE: {mean_rmse:.4f}, Bias: {mean_bias:.4f}, R²: {mean_r:.4f}, CSI: {mean_csi:.4f}")
                
                # Save summary for this fold
                with open(f"cluster_summary_{month}_{fold}.json", "w") as f:
                    json.dump(fold_summary, f, indent=2)
        
        # Find best fold for this month (lowest RMSE)
        if month_fold_results:
            best_fold_result = min(month_fold_results, key=lambda x: x["mean_test_rmse"])
            best_month_results.append(best_fold_result)
            
            print(f"\n Best fold for {month}: {best_fold_result['fold']}")
            print(f"  RMSE: {best_fold_result['mean_test_rmse']:.4f}, Bias: {best_fold_result['mean_test_bias']:.4f}, R²: {best_fold_result['mean_test_r']:.4f}, CSI: {best_fold_result['mean_test_csi']:.4f}")
    
    # Create a dataframe with all results
    if all_results:
        all_results_df = pd.DataFrame([
            {
                "month": r["month"],
                "fold": r["fold"],
                "cluster": r["cluster"],
                "test_rmse": r["test_rmse"],
                "test_bias": r["test_bias"],
                "test_r": r["test_r"],
                "test_csi": r["test_csi"],
                "val_rmse": r["val_rmse"],
                "val_bias": r["val_bias"],
                "val_r": r["val_r"],
                "val_csi": r["val_csi"]
            } for r in all_results
        ])
        
        # Save all results
        all_results_df.to_csv("lstm_cluster_results.csv", index=False)
        print("\n All cluster results saved to lstm_cluster_results.csv")
        
        # Create a dataframe with best fold results per month
        best_month_df = pd.DataFrame(best_month_results)
        best_month_df.to_csv("lstm_best_fold_per_month.csv", index=False)
        print(" Best fold results per month saved to lstm_best_fold_per_month.csv")
        
        # Calculate overall mean performance across best folds of all months
        if best_month_results:
            overall_mean_rmse = sum(r["mean_test_rmse"] for r in best_month_results) / len(best_month_results)
            overall_mean_bias = sum(r["mean_test_bias"] for r in best_month_results) / len(best_month_results)
            overall_mean_r = sum(r["mean_test_r"] for r in best_month_results) / len(best_month_results)
            overall_mean_csi = sum(r["mean_test_csi"] for r in best_month_results) / len(best_month_results)
            
            overall_results = {
                "overall_mean_rmse": round(overall_mean_rmse, 4),
                "overall_mean_bias": round(overall_mean_bias, 4),
                "overall_mean_r": round(overall_mean_r, 4),
                "overall_mean_csi": round(overall_mean_csi, 4),
                "num_months": len(best_month_results)
            }
            
            # Save overall results
            with open("lstm_overall_results.json", "w") as f:
                json.dump(overall_results, f, indent=2)
            
            print("\n Overall mean performance across best folds of all months:")
            print(f"  RMSE: {overall_mean_rmse:.4f}, Bias: {overall_mean_bias:.4f}, R²: {overall_mean_r:.4f}, CSI: {overall_mean_csi:.4f}")
            
            # Display best fold results for each month
            print("\n Best Fold Results by Month:")
            print(best_month_df)

In [5]:
def evaluate_cluster_with_checkpoint(month, fold, best_params, cluster_df=None, cluster_id=None):
    """Evaluate a pre-trained model on a specific cluster without retraining"""
    print(f"\n Evaluating cluster {cluster_id} for {month}, {fold} using checkpoint")
    
    # Extract parameters for data preparation
    time_step_in = best_params["time_step_in"]
    time_step_out = best_params["time_step_out"]
    stride = best_params["stride"]
    
    # Extract lag steps and window sizes
    num_lags = best_params["num_lags"]
    lag_steps = [best_params[f"lag_{i}"] for i in range(num_lags)]
    
    num_windows = best_params["num_windows"]
    window_sizes = [best_params[f"window_{i}"] for i in range(num_windows)]
    
    # Get config
    config_key = month.split("-")[1]
    config = MODEL_CONFIG[config_key]
    batch_size = config["BATCH_SIZE"]
    
    # Prepare data for this cluster only
    _, val_loader, test_loader, input_size = prepare_data_loaders(
        month, fold, lag_steps, window_sizes, time_step_in, time_step_out, batch_size, stride,
        cluster_df=cluster_df, cluster_id=cluster_id
    )
    
    if val_loader is None or test_loader is None:
        print(f"[ERRORS] Failed to prepare data for cluster {cluster_id}")
        return None
    
    # Load the pre-trained model for this month
    checkpoint_path = f"/kaggle/input/lstm-checkpoint/best_model_{month}.pt"
    
    # Create model with the same architecture as the pre-trained one
    model = LSTMModel(
        input_size=input_size,
        hidden_size=best_params["hidden_size"],
        num_layers=best_params["num_layers"],
        dropout=best_params["dropout"],
        time_step_out=time_step_out
    ).to(DEVICE)
    
    try:
        # Load checkpoint state
        model.load_state_dict(torch.load(checkpoint_path))
        print(f" Successfully loaded checkpoint from {checkpoint_path}")
    except Exception as e:
        print(f"[ERRORS] Error loading checkpoint: {str(e)}")
        return None
    
    # Set model to evaluation mode
    model.eval()
    
    # Evaluate on validation set
    print(f" Evaluating on validation set for cluster {cluster_id}...")
    val_rmse, val_bias, val_r, val_csi = evaluate_model(model, val_loader)
    print(f"  Val RMSE: {val_rmse:.4f}, Bias: {val_bias:.4f}, R²: {val_r:.4f}, CSI: {val_csi:.4f}")
    
    # Evaluate on test set
    print(f" Evaluating on test set for cluster {cluster_id}...")
    test_rmse, test_bias, test_r, test_csi = evaluate_model(model, test_loader)
    print(f"  Test RMSE: {test_rmse:.4f}, Bias: {test_bias:.4f}, R²: {test_r:.4f}, CSI: {test_csi:.4f}")
    
    return {
        "month": month,
        "fold": fold,
        "cluster": cluster_id,
        "test_rmse": round(test_rmse, 4),
        "test_bias": round(test_bias, 4),
        "test_r": round(test_r, 4),
        "test_csi": round(test_csi, 4),
        "val_rmse": round(val_rmse, 4),
        "val_bias": round(val_bias, 4),
        "val_r": round(val_r, 4),
        "val_csi": round(val_csi, 4)
    }

In [6]:
run_cluster_evaluation_from_checkpoint()

 Loaded clustering info: 334 rows

###### Processing month: 2019-04
 Loaded best parameters from /kaggle/input/lstm-checkpoint/best_params_2019-04.json

##### Processing 2019-04, fold_1

##### Processing cluster 0

 Evaluating cluster 0 for 2019-04, fold_1 using checkpoint


  return torch.tensor(sequences, dtype=torch.float32), torch.tensor(targets, dtype=torch.float32)
  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-04.pt
 Evaluating on validation set for cluster 0...
  Val RMSE: 0.0648, Bias: 0.0335, R²: -0.3288, CSI: 0.0000
 Evaluating on test set for cluster 0...
  Test RMSE: 0.9646, Bias: -0.1126, R²: -0.0111, CSI: 0.0991

##### Processing cluster 2

 Evaluating cluster 2 for 2019-04, fold_1 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-04.pt
 Evaluating on validation set for cluster 2...
  Val RMSE: 0.0569, Bias: 0.0381, R²: -0.8003, CSI: 0.0000
 Evaluating on test set for cluster 2...
  Test RMSE: 1.0960, Bias: -0.1579, R²: -0.0099, CSI: 0.0901

##### Processing cluster 1

 Evaluating cluster 1 for 2019-04, fold_1 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-04.pt
 Evaluating on validation set for cluster 1...
  Val RMSE: 0.0637, Bias: 0.0415, R²: -0.8127, CSI: 0.0000
 Evaluating on test set for cluster 1...
  Test RMSE: 1.1323, Bias: -0.1296, R²: -0.0112, CSI: 0.0417

##### Processing cluster 3

 Evaluating cluster 3 for 2019-04, fold_1 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-04.pt
 Evaluating on validation set for cluster 3...
  Val RMSE: 0.2836, Bias: 0.0382, R²: -0.0477, CSI: 0.0526
 Evaluating on test set for cluster 3...
  Test RMSE: 1.0428, Bias: -0.0868, R²: 0.0506, CSI: 0.2222

 Mean test performance across clusters for 2019-04, fold_1:
  RMSE: 1.0589, Bias: -0.1217, R²: 0.0046, CSI: 0.1133

##### Processing 2019-04, fold_2

##### Processing cluster 0

 Evaluating cluster 0 for 2019-04, fold_2 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-04.pt
 Evaluating on validation set for cluster 0...
  Val RMSE: 0.2620, Bias: 0.0026, R²: 0.0002, CSI: 0.0238
 Evaluating on test set for cluster 0...
  Test RMSE: 0.9637, Bias: -0.1021, R²: -0.0093, CSI: 0.1667

##### Processing cluster 2

 Evaluating cluster 2 for 2019-04, fold_2 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-04.pt
 Evaluating on validation set for cluster 2...
  Val RMSE: 0.4131, Bias: 0.0008, R²: -0.0035, CSI: 0.0000
 Evaluating on test set for cluster 2...
  Test RMSE: 1.0945, Bias: -0.1454, R²: -0.0071, CSI: 0.1008

##### Processing cluster 1

 Evaluating cluster 1 for 2019-04, fold_2 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-04.pt
 Evaluating on validation set for cluster 1...
  Val RMSE: 0.0507, Bias: 0.0285, R²: -0.4744, CSI: 0.0000
 Evaluating on test set for cluster 1...
  Test RMSE: 1.1304, Bias: -0.1204, R²: -0.0080, CSI: 0.0789

##### Processing cluster 3

 Evaluating cluster 3 for 2019-04, fold_2 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-04.pt
 Evaluating on validation set for cluster 3...
  Val RMSE: 0.3798, Bias: 0.0184, R²: 0.2688, CSI: 0.0377
 Evaluating on test set for cluster 3...
  Test RMSE: 1.0421, Bias: -0.0752, R²: 0.0519, CSI: 0.2059

 Mean test performance across clusters for 2019-04, fold_2:
  RMSE: 1.0577, Bias: -0.1108, R²: 0.0069, CSI: 0.1381

##### Processing 2019-04, fold_3

##### Processing cluster 0

 Evaluating cluster 0 for 2019-04, fold_3 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-04.pt
 Evaluating on validation set for cluster 0...
  Val RMSE: 0.9178, Bias: -0.1154, R²: -0.0095, CSI: 0.0867
 Evaluating on test set for cluster 0...
  Test RMSE: 0.9637, Bias: -0.1021, R²: -0.0092, CSI: 0.1667

##### Processing cluster 2

 Evaluating cluster 2 for 2019-04, fold_3 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-04.pt
 Evaluating on validation set for cluster 2...
  Val RMSE: 0.9449, Bias: -0.1115, R²: -0.0007, CSI: 0.0906
 Evaluating on test set for cluster 2...
  Test RMSE: 1.0947, Bias: -0.1455, R²: -0.0074, CSI: 0.0952

##### Processing cluster 1

 Evaluating cluster 1 for 2019-04, fold_3 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-04.pt
 Evaluating on validation set for cluster 1...
  Val RMSE: 0.5297, Bias: -0.0207, R²: -0.0004, CSI: 0.0234
 Evaluating on test set for cluster 1...
  Test RMSE: 1.1305, Bias: -0.1204, R²: -0.0081, CSI: 0.0789

##### Processing cluster 3

 Evaluating cluster 3 for 2019-04, fold_3 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-04.pt
 Evaluating on validation set for cluster 3...
  Val RMSE: 0.8012, Bias: -0.0695, R²: 0.0091, CSI: 0.0417
 Evaluating on test set for cluster 3...
  Test RMSE: 1.0406, Bias: -0.0770, R²: 0.0547, CSI: 0.2121

 Mean test performance across clusters for 2019-04, fold_3:
  RMSE: 1.0574, Bias: -0.1113, R²: 0.0075, CSI: 0.1382

##### Processing 2019-04, fold_4

##### Processing cluster 0

 Evaluating cluster 0 for 2019-04, fold_4 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-04.pt
 Evaluating on validation set for cluster 0...
  Val RMSE: 0.0599, Bias: 0.0374, R²: -0.6631, CSI: 0.0000
 Evaluating on test set for cluster 0...
  Test RMSE: 0.9629, Bias: -0.0883, R²: -0.0076, CSI: 0.1818

##### Processing cluster 2

 Evaluating cluster 2 for 2019-04, fold_4 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-04.pt
 Evaluating on validation set for cluster 2...
  Val RMSE: 0.0394, Bias: 0.0325, R²: -2.2075, CSI: 0.0000
 Evaluating on test set for cluster 2...
  Test RMSE: 1.0918, Bias: -0.1275, R²: -0.0021, CSI: 0.1114

##### Processing cluster 1

 Evaluating cluster 1 for 2019-04, fold_4 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-04.pt
 Evaluating on validation set for cluster 1...
  Val RMSE: 0.0681, Bias: 0.0428, R²: -0.6591, CSI: 0.0000
 Evaluating on test set for cluster 1...
  Test RMSE: 1.1195, Bias: -0.1034, R²: 0.0114, CSI: 0.0789

##### Processing cluster 3

 Evaluating cluster 3 for 2019-04, fold_4 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-04.pt
 Evaluating on validation set for cluster 3...
  Val RMSE: 0.0980, Bias: 0.0472, R²: -59.8929, CSI: 0.0667
 Evaluating on test set for cluster 3...
  Test RMSE: 1.0373, Bias: -0.0646, R²: 0.0606, CSI: 0.1772

 Mean test performance across clusters for 2019-04, fold_4:
  RMSE: 1.0529, Bias: -0.0959, R²: 0.0156, CSI: 0.1373

##### Processing 2019-04, fold_5

##### Processing cluster 0

 Evaluating cluster 0 for 2019-04, fold_5 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-04.pt
 Evaluating on validation set for cluster 0...
  Val RMSE: 0.4219, Bias: -0.0194, R²: -0.0084, CSI: 0.0064
 Evaluating on test set for cluster 0...
  Test RMSE: 0.9626, Bias: -0.0874, R²: -0.0070, CSI: 0.1899

##### Processing cluster 2

 Evaluating cluster 2 for 2019-04, fold_5 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-04.pt
 Evaluating on validation set for cluster 2...
  Val RMSE: 0.0645, Bias: 0.0264, R²: -0.0331, CSI: 0.0000
 Evaluating on test set for cluster 2...
  Test RMSE: 1.0917, Bias: -0.1270, R²: -0.0020, CSI: 0.1161

##### Processing cluster 1

 Evaluating cluster 1 for 2019-04, fold_5 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-04.pt
 Evaluating on validation set for cluster 1...
  Val RMSE: 0.1767, Bias: -0.0128, R²: 0.0230, CSI: 0.1449
 Evaluating on test set for cluster 1...
  Test RMSE: 1.1209, Bias: -0.1034, R²: 0.0090, CSI: 0.0779

##### Processing cluster 3

 Evaluating cluster 3 for 2019-04, fold_5 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-04.pt
 Evaluating on validation set for cluster 3...
  Val RMSE: 0.1464, Bias: -0.0103, R²: 0.0363, CSI: 0.2333
 Evaluating on test set for cluster 3...
  Test RMSE: 1.0373, Bias: -0.0639, R²: 0.0606, CSI: 0.1750

 Mean test performance across clusters for 2019-04, fold_5:
  RMSE: 1.0531, Bias: -0.0954, R²: 0.0152, CSI: 0.1397

 Best fold for 2019-04: fold_4
  RMSE: 1.0529, Bias: -0.0959, R²: 0.0156, CSI: 0.1373

###### Processing month: 2019-10
 Loaded best parameters from /kaggle/input/lstm-checkpoint/best_params_2019-10.json

##### Processing 2019-10, fold_1

##### Processing cluster 0

 Evaluating cluster 0 for 2019-10, fold_1 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-10.pt
 Evaluating on validation set for cluster 0...
  Val RMSE: 0.5648, Bias: 0.0379, R²: 0.4534, CSI: 0.6154
 Evaluating on test set for cluster 0...
  Test RMSE: 0.3370, Bias: 0.0323, R²: 0.7544, CSI: 0.5723

##### Processing cluster 2

 Evaluating cluster 2 for 2019-10, fold_1 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-10.pt
 Evaluating on validation set for cluster 2...
  Val RMSE: 0.4810, Bias: 0.0477, R²: 0.3449, CSI: 0.4326
 Evaluating on test set for cluster 2...
  Test RMSE: 0.2288, Bias: 0.0115, R²: 0.4340, CSI: 0.7052

##### Processing cluster 1

 Evaluating cluster 1 for 2019-10, fold_1 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-10.pt
 Evaluating on validation set for cluster 1...
  Val RMSE: 0.4553, Bias: 0.0653, R²: 0.1831, CSI: 0.5129
 Evaluating on test set for cluster 1...
  Test RMSE: 0.1212, Bias: 0.0330, R²: 0.4389, CSI: 0.7052

##### Processing cluster 3

 Evaluating cluster 3 for 2019-10, fold_1 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-10.pt
 Evaluating on validation set for cluster 3...
  Val RMSE: 0.7594, Bias: 0.0189, R²: 0.4677, CSI: 0.7861
 Evaluating on test set for cluster 3...
  Test RMSE: 0.9093, Bias: -0.0028, R²: 0.7315, CSI: 0.6418

 Mean test performance across clusters for 2019-10, fold_1:
  RMSE: 0.3991, Bias: 0.0185, R²: 0.5897, CSI: 0.6561

##### Processing 2019-10, fold_2

##### Processing cluster 0

 Evaluating cluster 0 for 2019-10, fold_2 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-10.pt
 Evaluating on validation set for cluster 0...
  Val RMSE: 0.5327, Bias: 0.0467, R²: 0.5146, CSI: 0.5356
 Evaluating on test set for cluster 0...
  Test RMSE: 0.2283, Bias: 0.0382, R²: 0.5765, CSI: 0.5931

##### Processing cluster 2

 Evaluating cluster 2 for 2019-10, fold_2 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-10.pt
 Evaluating on validation set for cluster 2...
  Val RMSE: 0.2623, Bias: 0.0424, R²: 0.5826, CSI: 0.4559
 Evaluating on test set for cluster 2...
  Test RMSE: 0.2602, Bias: 0.0224, R²: 0.6164, CSI: 0.6214

##### Processing cluster 1

 Evaluating cluster 1 for 2019-10, fold_2 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-10.pt
 Evaluating on validation set for cluster 1...
  Val RMSE: 0.2392, Bias: 0.0432, R²: 0.5107, CSI: 0.5503
 Evaluating on test set for cluster 1...
  Test RMSE: 0.1895, Bias: 0.0276, R²: 0.7291, CSI: 0.6126

##### Processing cluster 3

 Evaluating cluster 3 for 2019-10, fold_2 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-10.pt
 Evaluating on validation set for cluster 3...
  Val RMSE: 0.9549, Bias: 0.0192, R²: 0.4675, CSI: 0.7234
 Evaluating on test set for cluster 3...
  Test RMSE: 1.1623, Bias: -0.0671, R²: 0.6928, CSI: 0.6679

 Mean test performance across clusters for 2019-10, fold_2:
  RMSE: 0.4601, Bias: 0.0053, R²: 0.6537, CSI: 0.6238

##### Processing 2019-10, fold_3

##### Processing cluster 0

 Evaluating cluster 0 for 2019-10, fold_3 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-10.pt
 Evaluating on validation set for cluster 0...
  Val RMSE: 0.2699, Bias: 0.0596, R²: 0.6251, CSI: 0.5132
 Evaluating on test set for cluster 0...
  Test RMSE: 0.2071, Bias: 0.0536, R²: 0.5181, CSI: 0.5930

##### Processing cluster 2

 Evaluating cluster 2 for 2019-10, fold_3 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-10.pt
 Evaluating on validation set for cluster 2...
  Val RMSE: 0.2995, Bias: 0.0376, R²: 0.4710, CSI: 0.3702
 Evaluating on test set for cluster 2...
  Test RMSE: 0.2773, Bias: 0.0355, R²: 0.4364, CSI: 0.5748

##### Processing cluster 1

 Evaluating cluster 1 for 2019-10, fold_3 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-10.pt
 Evaluating on validation set for cluster 1...
  Val RMSE: 0.2418, Bias: 0.0419, R²: 0.3656, CSI: 0.5327
 Evaluating on test set for cluster 1...
  Test RMSE: 0.1500, Bias: 0.0449, R²: 0.6542, CSI: 0.5728

##### Processing cluster 3

 Evaluating cluster 3 for 2019-10, fold_3 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-10.pt
 Evaluating on validation set for cluster 3...
  Val RMSE: 0.8140, Bias: 0.0334, R²: 0.6160, CSI: 0.6995
 Evaluating on test set for cluster 3...
  Test RMSE: 1.3272, Bias: -0.1441, R²: 0.6956, CSI: 0.6966

 Mean test performance across clusters for 2019-10, fold_3:
  RMSE: 0.4904, Bias: -0.0025, R²: 0.5761, CSI: 0.6093

##### Processing 2019-10, fold_4

##### Processing cluster 0

 Evaluating cluster 0 for 2019-10, fold_4 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-10.pt
 Evaluating on validation set for cluster 0...
  Val RMSE: 0.1740, Bias: 0.0163, R²: 0.3683, CSI: 0.6025
 Evaluating on test set for cluster 0...
  Test RMSE: 0.2701, Bias: 0.0577, R²: 0.2350, CSI: 0.5964

##### Processing cluster 2

 Evaluating cluster 2 for 2019-10, fold_4 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-10.pt
 Evaluating on validation set for cluster 2...
  Val RMSE: 0.1567, Bias: 0.0057, R²: 0.5564, CSI: 0.5358
 Evaluating on test set for cluster 2...
  Test RMSE: 0.3134, Bias: 0.0298, R²: 0.4856, CSI: 0.6135

##### Processing cluster 1

 Evaluating cluster 1 for 2019-10, fold_4 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-10.pt
 Evaluating on validation set for cluster 1...
  Val RMSE: 0.1927, Bias: 0.0026, R²: 0.3056, CSI: 0.5968
 Evaluating on test set for cluster 1...
  Test RMSE: 0.1338, Bias: 0.0462, R²: 0.5886, CSI: 0.5909

##### Processing cluster 3

 Evaluating cluster 3 for 2019-10, fold_4 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-10.pt
 Evaluating on validation set for cluster 3...
  Val RMSE: 0.6930, Bias: 0.0345, R²: 0.6562, CSI: 0.7089
 Evaluating on test set for cluster 3...
  Test RMSE: 1.3316, Bias: -0.1410, R²: 0.6709, CSI: 0.7680

 Mean test performance across clusters for 2019-10, fold_4:
  RMSE: 0.5122, Bias: -0.0018, R²: 0.4950, CSI: 0.6422

##### Processing 2019-10, fold_5

##### Processing cluster 0

 Evaluating cluster 0 for 2019-10, fold_5 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-10.pt
 Evaluating on validation set for cluster 0...
  Val RMSE: 0.3042, Bias: 0.0049, R²: 0.5870, CSI: 0.5992
 Evaluating on test set for cluster 0...
  Test RMSE: 0.4006, Bias: 0.0287, R²: 0.6511, CSI: 0.5882

##### Processing cluster 2

 Evaluating cluster 2 for 2019-10, fold_5 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-10.pt
 Evaluating on validation set for cluster 2...
  Val RMSE: 0.2818, Bias: -0.0021, R²: 0.4641, CSI: 0.5621
 Evaluating on test set for cluster 2...
  Test RMSE: 0.3579, Bias: 0.0338, R²: 0.5813, CSI: 0.5919

##### Processing cluster 1

 Evaluating cluster 1 for 2019-10, fold_5 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-10.pt
 Evaluating on validation set for cluster 1...
  Val RMSE: 0.3234, Bias: 0.0049, R²: 0.5443, CSI: 0.5848
 Evaluating on test set for cluster 1...
  Test RMSE: 0.3892, Bias: 0.0242, R²: 0.6292, CSI: 0.5754

##### Processing cluster 3

 Evaluating cluster 3 for 2019-10, fold_5 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2019-10.pt
 Evaluating on validation set for cluster 3...
  Val RMSE: 0.7328, Bias: -0.0065, R²: 0.6495, CSI: 0.7293
 Evaluating on test set for cluster 3...
  Test RMSE: 1.1629, Bias: -0.2433, R²: 0.6585, CSI: 0.7986

 Mean test performance across clusters for 2019-10, fold_5:
  RMSE: 0.5776, Bias: -0.0392, R²: 0.6300, CSI: 0.6385

 Best fold for 2019-10: fold_1
  RMSE: 0.3991, Bias: 0.0185, R²: 0.5897, CSI: 0.6561

###### Processing month: 2020-04
 Loaded best parameters from /kaggle/input/lstm-checkpoint/best_params_2020-04.json

##### Processing 2020-04, fold_1

##### Processing cluster 0

 Evaluating cluster 0 for 2020-04, fold_1 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-04.pt
 Evaluating on validation set for cluster 0...
  Val RMSE: 0.3387, Bias: -0.0670, R²: -0.1370, CSI: 0.5153
 Evaluating on test set for cluster 0...
  Test RMSE: 0.1952, Bias: 0.1242, R²: 0.0000, CSI: 0.0000

##### Processing cluster 2

 Evaluating cluster 2 for 2020-04, fold_1 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-04.pt
 Evaluating on validation set for cluster 2...
  Val RMSE: 0.3046, Bias: 0.0293, R²: -0.2644, CSI: 0.4181
 Evaluating on test set for cluster 2...
  Test RMSE: 0.3317, Bias: 0.1353, R²: -0.8809, CSI: 0.0000

##### Processing cluster 1

 Evaluating cluster 1 for 2020-04, fold_1 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-04.pt
 Evaluating on validation set for cluster 1...
  Val RMSE: 0.6031, Bias: -0.0714, R²: -0.0383, CSI: 0.5474
 Evaluating on test set for cluster 1...
  Test RMSE: 0.2008, Bias: 0.1312, R²: -130.5210, CSI: 0.0096

##### Processing cluster 3

 Evaluating cluster 3 for 2020-04, fold_1 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-04.pt
 Evaluating on validation set for cluster 3...
  Val RMSE: 0.8708, Bias: -0.1536, R²: -0.0464, CSI: 0.6356
 Evaluating on test set for cluster 3...
  Test RMSE: 0.3788, Bias: 0.1409, R²: 0.1332, CSI: 0.0285

 Mean test performance across clusters for 2020-04, fold_1:
  RMSE: 0.2766, Bias: 0.1329, R²: -32.8172, CSI: 0.0095

##### Processing 2020-04, fold_2

##### Processing cluster 0

 Evaluating cluster 0 for 2020-04, fold_2 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-04.pt
 Evaluating on validation set for cluster 0...
  Val RMSE: 0.7572, Bias: -0.2024, R²: 0.0820, CSI: 0.5318
 Evaluating on test set for cluster 0...
  Test RMSE: 0.0568, Bias: 0.0328, R²: 0.0000, CSI: 0.0000

##### Processing cluster 2

 Evaluating cluster 2 for 2020-04, fold_2 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-04.pt
 Evaluating on validation set for cluster 2...
  Val RMSE: 0.5709, Bias: 0.0148, R²: -0.1996, CSI: 0.4092
 Evaluating on test set for cluster 2...
  Test RMSE: 0.2496, Bias: 0.0298, R²: -0.0650, CSI: 0.0000

##### Processing cluster 1

 Evaluating cluster 1 for 2020-04, fold_2 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-04.pt
 Evaluating on validation set for cluster 1...
  Val RMSE: 0.8952, Bias: -0.2494, R²: 0.1583, CSI: 0.5569
 Evaluating on test set for cluster 1...
  Test RMSE: 0.0647, Bias: 0.0381, R²: -12.6569, CSI: 0.0556

##### Processing cluster 3

 Evaluating cluster 3 for 2020-04, fold_2 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-04.pt
 Evaluating on validation set for cluster 3...
  Val RMSE: 0.8320, Bias: -0.2702, R²: 0.0992, CSI: 0.5567
 Evaluating on test set for cluster 3...
  Test RMSE: 0.3256, Bias: 0.0186, R²: 0.3595, CSI: 0.1389

 Mean test performance across clusters for 2020-04, fold_2:
  RMSE: 0.1742, Bias: 0.0298, R²: -3.0906, CSI: 0.0486

##### Processing 2020-04, fold_3

##### Processing cluster 0

 Evaluating cluster 0 for 2020-04, fold_3 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-04.pt
 Evaluating on validation set for cluster 0...
  Val RMSE: 0.0457, Bias: -0.0222, R²: -0.3205, CSI: 0.0000
 Evaluating on test set for cluster 0...
  Test RMSE: 0.0791, Bias: 0.0545, R²: 0.0000, CSI: 0.0000

##### Processing cluster 2

 Evaluating cluster 2 for 2020-04, fold_3 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-04.pt
 Evaluating on validation set for cluster 2...
  Val RMSE: 0.0453, Bias: -0.0107, R²: -0.0380, CSI: 0.0000
 Evaluating on test set for cluster 2...
  Test RMSE: 0.2573, Bias: 0.0536, R²: -0.1320, CSI: 0.0000

##### Processing cluster 1

 Evaluating cluster 1 for 2020-04, fold_3 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-04.pt
 Evaluating on validation set for cluster 1...
  Val RMSE: 0.0383, Bias: -0.0141, R²: -0.2080, CSI: 0.0000
 Evaluating on test set for cluster 1...
  Test RMSE: 0.0824, Bias: 0.0590, R²: -21.1496, CSI: 0.0260

##### Processing cluster 3

 Evaluating cluster 3 for 2020-04, fold_3 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-04.pt
 Evaluating on validation set for cluster 3...
  Val RMSE: 0.1427, Bias: -0.0224, R²: -0.0014, CSI: 0.2222
 Evaluating on test set for cluster 3...
  Test RMSE: 0.3278, Bias: 0.0404, R²: 0.3508, CSI: 0.0748

 Mean test performance across clusters for 2020-04, fold_3:
  RMSE: 0.1866, Bias: 0.0519, R²: -5.2327, CSI: 0.0252

##### Processing 2020-04, fold_4

##### Processing cluster 0

 Evaluating cluster 0 for 2020-04, fold_4 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-04.pt
 Evaluating on validation set for cluster 0...
  Val RMSE: 0.3240, Bias: 0.0921, R²: -17.4141, CSI: 0.0538
 Evaluating on test set for cluster 0...
  Test RMSE: 0.0915, Bias: 0.0666, R²: 0.0000, CSI: 0.0000

##### Processing cluster 2

 Evaluating cluster 2 for 2020-04, fold_4 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-04.pt
 Evaluating on validation set for cluster 2...
  Val RMSE: 0.1517, Bias: 0.0475, R²: -9.5100, CSI: 0.0000
 Evaluating on test set for cluster 2...
  Test RMSE: 0.2627, Bias: 0.0668, R²: -0.1805, CSI: 0.0000

##### Processing cluster 1

 Evaluating cluster 1 for 2020-04, fold_4 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-04.pt
 Evaluating on validation set for cluster 1...
  Val RMSE: 0.5855, Bias: 0.0496, R²: -0.4401, CSI: 0.0472
 Evaluating on test set for cluster 1...
  Test RMSE: 0.0925, Bias: 0.0701, R²: -26.8751, CSI: 0.0214

##### Processing cluster 3

 Evaluating cluster 3 for 2020-04, fold_4 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-04.pt
 Evaluating on validation set for cluster 3...
  Val RMSE: 0.3395, Bias: 0.0488, R²: -0.7229, CSI: 0.0538
 Evaluating on test set for cluster 3...
  Test RMSE: 0.3296, Bias: 0.0524, R²: 0.3439, CSI: 0.0601

 Mean test performance across clusters for 2020-04, fold_4:
  RMSE: 0.1941, Bias: 0.0640, R²: -6.6779, CSI: 0.0204

##### Processing 2020-04, fold_5

##### Processing cluster 0

 Evaluating cluster 0 for 2020-04, fold_5 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-04.pt
 Evaluating on validation set for cluster 0...
  Val RMSE: 0.8968, Bias: -0.2741, R²: -0.0979, CSI: 0.6339
 Evaluating on test set for cluster 0...
  Test RMSE: 0.0912, Bias: 0.0664, R²: 0.0000, CSI: 0.0000

##### Processing cluster 2

 Evaluating cluster 2 for 2020-04, fold_5 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-04.pt
 Evaluating on validation set for cluster 2...
  Val RMSE: 1.1554, Bias: -0.2639, R²: 0.0003, CSI: 0.4609
 Evaluating on test set for cluster 2...
  Test RMSE: 0.2627, Bias: 0.0666, R²: -0.1799, CSI: 0.0000

##### Processing cluster 1

 Evaluating cluster 1 for 2020-04, fold_5 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-04.pt
 Evaluating on validation set for cluster 1...
  Val RMSE: 1.1182, Bias: -0.2996, R²: 0.0801, CSI: 0.6317
 Evaluating on test set for cluster 1...
  Test RMSE: 0.0923, Bias: 0.0699, R²: -26.7671, CSI: 0.0219

##### Processing cluster 3

 Evaluating cluster 3 for 2020-04, fold_5 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-04.pt
 Evaluating on validation set for cluster 3...
  Val RMSE: 0.8308, Bias: -0.2924, R²: -0.1482, CSI: 0.7056
 Evaluating on test set for cluster 3...
  Test RMSE: 0.3295, Bias: 0.0522, R²: 0.3442, CSI: 0.0608

 Mean test performance across clusters for 2020-04, fold_5:
  RMSE: 0.1939, Bias: 0.0638, R²: -6.6507, CSI: 0.0207

 Best fold for 2020-04: fold_2
  RMSE: 0.1742, Bias: 0.0298, R²: -3.0906, CSI: 0.0486

###### Processing month: 2020-10
 Loaded best parameters from /kaggle/input/lstm-checkpoint/best_params_2020-10.json

##### Processing 2020-10, fold_1

##### Processing cluster 0

 Evaluating cluster 0 for 2020-10, fold_1 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-10.pt
 Evaluating on validation set for cluster 0...
  Val RMSE: 0.5074, Bias: 0.0689, R²: 0.8635, CSI: 0.6844
 Evaluating on test set for cluster 0...
  Test RMSE: 0.8089, Bias: 0.1018, R²: 0.8522, CSI: 0.7280

##### Processing cluster 2

 Evaluating cluster 2 for 2020-10, fold_1 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-10.pt
 Evaluating on validation set for cluster 2...
  Val RMSE: 0.4379, Bias: 0.0637, R²: 0.6651, CSI: 0.4786
 Evaluating on test set for cluster 2...
  Test RMSE: 0.4735, Bias: 0.0711, R²: 0.7325, CSI: 0.5844

##### Processing cluster 1

 Evaluating cluster 1 for 2020-10, fold_1 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-10.pt
 Evaluating on validation set for cluster 1...
  Val RMSE: 0.5098, Bias: 0.0309, R²: 0.8165, CSI: 0.7803
 Evaluating on test set for cluster 1...
  Test RMSE: 0.6191, Bias: 0.0492, R²: 0.8338, CSI: 0.7938

##### Processing cluster 3

 Evaluating cluster 3 for 2020-10, fold_1 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-10.pt
 Evaluating on validation set for cluster 3...
  Val RMSE: 0.9689, Bias: -0.0079, R²: 0.8082, CSI: 0.7429
 Evaluating on test set for cluster 3...
  Test RMSE: 1.5566, Bias: -0.1246, R²: 0.7580, CSI: 0.8314

 Mean test performance across clusters for 2020-10, fold_1:
  RMSE: 0.8645, Bias: 0.0244, R²: 0.7941, CSI: 0.7344

##### Processing 2020-10, fold_2

##### Processing cluster 0

 Evaluating cluster 0 for 2020-10, fold_2 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-10.pt
 Evaluating on validation set for cluster 0...
  Val RMSE: 0.5619, Bias: 0.0969, R²: 0.8703, CSI: 0.6482
 Evaluating on test set for cluster 0...
  Test RMSE: 0.8576, Bias: 0.1022, R²: 0.8473, CSI: 0.7774

##### Processing cluster 2

 Evaluating cluster 2 for 2020-10, fold_2 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-10.pt
 Evaluating on validation set for cluster 2...
  Val RMSE: 0.5516, Bias: 0.0898, R²: 0.6224, CSI: 0.4830
 Evaluating on test set for cluster 2...
  Test RMSE: 0.2165, Bias: 0.0648, R²: 0.4301, CSI: 0.5136

##### Processing cluster 1

 Evaluating cluster 1 for 2020-10, fold_2 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-10.pt
 Evaluating on validation set for cluster 1...
  Val RMSE: 0.7941, Bias: 0.0364, R²: 0.7463, CSI: 0.7389
 Evaluating on test set for cluster 1...
  Test RMSE: 0.7465, Bias: 0.0430, R²: 0.8450, CSI: 0.8281

##### Processing cluster 3

 Evaluating cluster 3 for 2020-10, fold_2 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-10.pt
 Evaluating on validation set for cluster 3...
  Val RMSE: 1.3064, Bias: -0.1057, R²: 0.7691, CSI: 0.7339
 Evaluating on test set for cluster 3...
  Test RMSE: 1.3929, Bias: -0.1248, R²: 0.7768, CSI: 0.8137

 Mean test performance across clusters for 2020-10, fold_2:
  RMSE: 0.8034, Bias: 0.0213, R²: 0.7248, CSI: 0.7332

##### Processing 2020-10, fold_3

##### Processing cluster 0

 Evaluating cluster 0 for 2020-10, fold_3 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-10.pt
 Evaluating on validation set for cluster 0...
  Val RMSE: 0.8036, Bias: 0.0957, R²: 0.8151, CSI: 0.7173
 Evaluating on test set for cluster 0...
  Test RMSE: 0.5153, Bias: 0.1235, R²: 0.7867, CSI: 0.7159

##### Processing cluster 2

 Evaluating cluster 2 for 2020-10, fold_3 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-10.pt
 Evaluating on validation set for cluster 2...
  Val RMSE: 0.3829, Bias: 0.0931, R²: 0.7329, CSI: 0.4718
 Evaluating on test set for cluster 2...
  Test RMSE: 0.1726, Bias: 0.0691, R²: 0.3489, CSI: 0.4648

##### Processing cluster 1

 Evaluating cluster 1 for 2020-10, fold_3 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-10.pt
 Evaluating on validation set for cluster 1...
  Val RMSE: 0.7019, Bias: 0.0539, R²: 0.8566, CSI: 0.8104
 Evaluating on test set for cluster 1...
  Test RMSE: 0.4109, Bias: 0.0871, R²: 0.5690, CSI: 0.7382

##### Processing cluster 3

 Evaluating cluster 3 for 2020-10, fold_3 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-10.pt
 Evaluating on validation set for cluster 3...
  Val RMSE: 1.6815, Bias: -0.1209, R²: 0.7012, CSI: 0.7776
 Evaluating on test set for cluster 3...
  Test RMSE: 1.2389, Bias: -0.0699, R²: 0.7697, CSI: 0.8178

 Mean test performance across clusters for 2020-10, fold_3:
  RMSE: 0.5844, Bias: 0.0524, R²: 0.6186, CSI: 0.6842

##### Processing 2020-10, fold_4

##### Processing cluster 0

 Evaluating cluster 0 for 2020-10, fold_4 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-10.pt
 Evaluating on validation set for cluster 0...
  Val RMSE: 0.6335, Bias: 0.0798, R²: 0.8917, CSI: 0.7240
 Evaluating on test set for cluster 0...
  Test RMSE: 0.2133, Bias: 0.0895, R²: 0.6456, CSI: 0.6592

##### Processing cluster 2

 Evaluating cluster 2 for 2020-10, fold_4 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-10.pt
 Evaluating on validation set for cluster 2...
  Val RMSE: 0.2601, Bias: 0.0767, R²: 0.4494, CSI: 0.4147
 Evaluating on test set for cluster 2...
  Test RMSE: 0.1501, Bias: 0.0678, R²: 0.2774, CSI: 0.3835

##### Processing cluster 1

 Evaluating cluster 1 for 2020-10, fold_4 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-10.pt
 Evaluating on validation set for cluster 1...
  Val RMSE: 1.0152, Bias: -0.0006, R²: 0.6924, CSI: 0.7439
 Evaluating on test set for cluster 1...
  Test RMSE: 0.1976, Bias: 0.0614, R²: 0.8297, CSI: 0.7011

##### Processing cluster 3

 Evaluating cluster 3 for 2020-10, fold_4 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-10.pt
 Evaluating on validation set for cluster 3...
  Val RMSE: 1.8055, Bias: -0.1843, R²: 0.6795, CSI: 0.8181
 Evaluating on test set for cluster 3...
  Test RMSE: 0.8991, Bias: 0.0136, R²: 0.7675, CSI: 0.8044

 Mean test performance across clusters for 2020-10, fold_4:
  RMSE: 0.3650, Bias: 0.0581, R²: 0.6300, CSI: 0.6370

##### Processing 2020-10, fold_5

##### Processing cluster 0

 Evaluating cluster 0 for 2020-10, fold_5 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-10.pt
 Evaluating on validation set for cluster 0...
  Val RMSE: 0.5907, Bias: 0.0963, R²: 0.6238, CSI: 0.6265
 Evaluating on test set for cluster 0...
  Test RMSE: 0.1716, Bias: 0.0744, R²: 0.8353, CSI: 0.6439

##### Processing cluster 2

 Evaluating cluster 2 for 2020-10, fold_5 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-10.pt
 Evaluating on validation set for cluster 2...
  Val RMSE: 0.2312, Bias: 0.0583, R²: 0.5093, CSI: 0.4409
 Evaluating on test set for cluster 2...
  Test RMSE: 0.2137, Bias: 0.0783, R²: -0.1077, CSI: 0.3783

##### Processing cluster 1

 Evaluating cluster 1 for 2020-10, fold_5 using checkpoint


  model.load_state_dict(torch.load(checkpoint_path))


 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-10.pt
 Evaluating on validation set for cluster 1...
  Val RMSE: 0.5491, Bias: 0.0713, R²: 0.7199, CSI: 0.6839
 Evaluating on test set for cluster 1...
  Test RMSE: 0.1953, Bias: 0.0613, R²: 0.8744, CSI: 0.7149

##### Processing cluster 3

 Evaluating cluster 3 for 2020-10, fold_5 using checkpoint
 Successfully loaded checkpoint from /kaggle/input/lstm-checkpoint/best_model_2020-10.pt
 Evaluating on validation set for cluster 3...
  Val RMSE: 1.7074, Bias: -0.3005, R²: 0.6417, CSI: 0.8255
 Evaluating on test set for cluster 3...
  Test RMSE: 0.6467, Bias: 0.0535, R²: 0.7158, CSI: 0.7916

 Mean test performance across clusters for 2020-10, fold_5:
  RMSE: 0.3068, Bias: 0.0669, R²: 0.5795, CSI: 0.6322

 Best fold for 2020-10: fold_5
  RMSE: 0.3068, Bias: 0.0669, R²: 0.5794, CSI: 0.6322

 All cluster results saved to lstm_cluster_results.csv
 Best fold results per month saved to lstm_best_fold_per_month.csv

  model.load_state_dict(torch.load(checkpoint_path))
