In [1]:
import os
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score
import json
import optuna
from functools import partial
from joblib import Memory

# Setup memory cache
cache_dir = './joblib_cache'
memory = Memory(cache_dir, verbose=0)

class Config:
    TARGET = 'AWS'
    USE_LAG_FEATURES = True
    USE_ROLLING_STATISTICS = True

# Paths
base_path = "/kaggle/input/ai-dataimputedataset-k-fold"
months = ["2019-04", "2019-10", "2020-04", "2020-10"]
folds = [f"fold_{i}" for i in range(1, 6)]

# Define selected features
selected_features = [
    'TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV'
]

# Device configuration
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Model parameters - now configurable via Optuna
MODEL_CONFIG = {
    "04": {"BATCH_SIZE": 64, "EPOCHS": 10},
    "10": {"BATCH_SIZE": 128, "EPOCHS": 15},
}

# Create lag features
@memory.cache
def create_lag_features(train_df, test_df, target_column, lag_steps, groupby_cols):
    """Create lag features for the target column"""
    result_df = test_df.copy()
    
    # Combine train and test for continuous time series
    combined_df = pd.concat([train_df, test_df]).sort_values('DATETIME').reset_index(drop=True)
    
    # Create lag features
    for lag in lag_steps:
        combined_df[f'{target_column}_lag{lag}'] = combined_df.groupby(groupby_cols)[target_column].shift(lag)
    
    # Extract only the test portion with lag features
    result_df = combined_df.iloc[len(train_df):].reset_index(drop=True)

    return result_df

# Create rolling statistics
@memory.cache
def create_rolling_statistics(train_df, test_df, target_column, window_sizes, groupby_cols):
    """Create rolling statistics features for the target column"""
    result_df = test_df.copy()
    
    # Combine train and test for continuous rolling stats
    combined_df = pd.concat([train_df, test_df]).sort_values('DATETIME').reset_index(drop=True)
    
    # Create rolling features
    for window in window_sizes:
        # Rolling mean
        combined_df[f'{target_column}_rollmean_{window}'] = combined_df.groupby(groupby_cols)[target_column].transform(
            lambda x: x.rolling(window, min_periods=1).mean())
        # Rolling std
        combined_df[f'{target_column}_rollstd_{window}'] = combined_df.groupby(groupby_cols)[target_column].transform(
            lambda x: x.rolling(window, min_periods=1).std())
    
    # Extract only the test portion with rolling features
    result_df = combined_df.iloc[len(train_df):].reset_index(drop=True)
    
    return result_df

# Handle missing values
@memory.cache
def handle_missing_values(df, lag_steps, window_sizes):
    """Handle missing values in the dataframe"""
    result_df = df.copy()
    
    # Fill NaN values in lag features with 0
    for lag in lag_steps:
        lag_col = f'{Config.TARGET}_lag{lag}'
        if lag_col in result_df.columns:
            result_df[lag_col] = result_df[lag_col].fillna(0)
    
    # Fill NaN values in rolling features with 0
    for window in window_sizes:
        mean_col = f'{Config.TARGET}_rollmean_{window}'
        std_col = f'{Config.TARGET}_rollstd_{window}'
        
        if mean_col in result_df.columns:
            result_df[mean_col] = result_df[mean_col].fillna(0)
        
        if std_col in result_df.columns:
            result_df[std_col] = result_df[std_col].fillna(0)
    
    # Fill remaining NaNs with 0
    result_df = result_df.fillna(0)
    
    return result_df



# Modified load_and_process_data function to filter features
@memory.cache
def load_and_process_data(file_path, train_file_path=None, lag_steps=None, window_sizes=None):
    """
    Load and process data from file_path with optional lag features and rolling statistics
    If train_file_path is provided, use it for creating lag and rolling features
    """
    
    try:
        raw_df = pd.read_csv(file_path)
        
        # Filter to keep only selected features plus essential columns
        essential_cols = ['DATETIME', 'ROW', 'COL', Config.TARGET]
        feature_cols = [col for col in selected_features if col in raw_df.columns]
        filtered_cols = essential_cols + feature_cols
        
        # Keep only needed columns
        raw_df = raw_df[filtered_cols]
        
        # Convert target to numeric and handle NaN/inf values
        raw_df[Config.TARGET] = pd.to_numeric(raw_df[Config.TARGET], errors='coerce')
        raw_df = raw_df.replace([np.inf, -np.inf], np.nan)
        df = raw_df.dropna(subset=[Config.TARGET]).copy()
        
        # Sort by datetime for proper sequence handling
        if 'DATETIME' in df.columns:
            df = df.sort_values("DATETIME").reset_index(drop=True)
        
        # If we're in train mode or not creating lag/rolling features
        if train_file_path is None or (lag_steps is None and window_sizes is None):
            return df
        
        # Otherwise, we're in eval mode and need to carefully create features
        train_df = pd.read_csv(train_file_path)
        
        # Filter training data to keep only selected features
        train_df = train_df[filtered_cols]
        
        train_df[Config.TARGET] = pd.to_numeric(train_df[Config.TARGET], errors='coerce')
        train_df = train_df.replace([np.inf, -np.inf], np.nan)
        train_df = train_df.dropna(subset=[Config.TARGET]).copy()
        
        if 'DATETIME' in train_df.columns:
            train_df = train_df.sort_values("DATETIME").reset_index(drop=True)
        
        # Create lag features if needed
        if Config.USE_LAG_FEATURES and lag_steps:
            df = create_lag_features(train_df, df, Config.TARGET, lag_steps, ['ROW', 'COL'])
        
        # Create rolling statistics if needed
        if Config.USE_ROLLING_STATISTICS and window_sizes:
            df = create_rolling_statistics(train_df, df, Config.TARGET, window_sizes, ['ROW', 'COL'])
        
        # Handle missing values
        if lag_steps or window_sizes:
            df = handle_missing_values(df, lag_steps or [], window_sizes or [])
     
        return df
        
    except Exception as e:
        print(f" Error loading or processing data: {str(e)}")
        return pd.DataFrame()

# # Enhanced LSTM Model
# class LSTMModel(nn.Module):
#     def __init__(self, input_size, hidden_size=64, num_layers=2, dropout=0.0, time_step_out=1):
#         super(LSTMModel, self).__init__()
#         self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
#         self.fc = nn.Linear(hidden_size, time_step_out)
#         self.time_step_out = time_step_out

#     def forward(self, x):
#         out, _ = self.lstm(x)
#         return self.fc(out[:, -1, :])

# Create sequences with configurable input and output time steps
def create_sequences(df, input_cols, target_col, time_step_in, time_step_out=1, stride=1):
    """
    Create sequences from dataframe with configurable input and output time steps
    - time_step_in: number of time steps for input
    - time_step_out: number of future steps to predict
    - stride: step size for sliding window
    """
    sequences, targets = [], []
    grouped = df.groupby(['ROW', 'COL'])
    
    for _, group in grouped:
        # Make sure group is sorted by time
        if 'DATETIME' in group.columns:
            group = group.sort_values("DATETIME")
            
        data = group[input_cols].values
        target_data = group[target_col].values
        
        if len(data) < time_step_in + time_step_out:
            continue
        
        for i in range(0, len(data) - time_step_in - time_step_out + 1, stride):
            seq = data[i:i+time_step_in]
            if time_step_out == 1:
                target = target_data[i+time_step_in]
                targets.append(target)
            else:
                target = target_data[i+time_step_in:i+time_step_in+time_step_out]
                targets.append(target)
            sequences.append(seq)
            
    if not sequences:
        return torch.tensor([]), torch.tensor([])
    
    if time_step_out == 1:
        return torch.tensor(sequences, dtype=torch.float32), torch.tensor(targets, dtype=torch.float32).unsqueeze(1)
    else:
        return torch.tensor(sequences, dtype=torch.float32), torch.tensor(targets, dtype=torch.float32)

# Prepare data loaders
# Modified prepare_data_loaders function
def prepare_data_loaders(month, fold, lag_steps, window_sizes, time_step_in, time_step_out, batch_size, stride=1):
    """Prepare data loaders with specific time steps and features"""
    folder = os.path.join(base_path, month, fold)
    
    # Load train data
    train_df = load_and_process_data(os.path.join(folder, "processed_train.csv"))
    
    # Load validation data
    val_df = load_and_process_data(os.path.join(folder, "processed_val.csv"))
    
    # Load test data
    test_df = load_and_process_data(os.path.join(folder, "merged_test.csv"))
    
    if train_df.empty or val_df.empty or test_df.empty:
        print(" One or more datasets are empty")
        return None, None, None, 0
    
    # Sort data by time
    if 'DATETIME' in train_df.columns:
        train_df = train_df.sort_values("DATETIME").reset_index(drop=True)
    if 'DATETIME' in val_df.columns:
        val_df = val_df.sort_values("DATETIME").reset_index(drop=True)
    if 'DATETIME' in test_df.columns:
        test_df = test_df.sort_values("DATETIME").reset_index(drop=True)
    
    # Create features before further processing
    # 1. Create lag features separately for each dataset
    if Config.USE_LAG_FEATURES and lag_steps:
        # Create lag features for train set using itself
        for lag in lag_steps:
            train_df[f'{Config.TARGET}_lag{lag}'] = train_df.groupby(['ROW', 'COL'])[Config.TARGET].shift(lag)
        
        # Create lag features for validation set using train + val
        train_val_df = pd.concat([train_df, val_df]).sort_values('DATETIME').reset_index(drop=True)
        for lag in lag_steps:
            train_val_df[f'{Config.TARGET}_lag{lag}'] = train_val_df.groupby(['ROW', 'COL'])[Config.TARGET].shift(lag)
        val_df = train_val_df.iloc[len(train_df):].reset_index(drop=True)
        
        # Create lag features for test set using train + val + test
        full_df = pd.concat([train_df, val_df, test_df]).sort_values('DATETIME').reset_index(drop=True)
        for lag in lag_steps:
            full_df[f'{Config.TARGET}_lag{lag}'] = full_df.groupby(['ROW', 'COL'])[Config.TARGET].shift(lag)
        test_df = full_df.iloc[len(train_df) + len(val_df):].reset_index(drop=True)
    
    # 2. Create rolling statistics separately for each dataset
    if Config.USE_ROLLING_STATISTICS and window_sizes:
        # Create rolling stats for train set
        for window in window_sizes:
            train_df[f'{Config.TARGET}_rollmean_{window}'] = train_df.groupby(['ROW', 'COL'])[Config.TARGET].transform(
                lambda x: x.rolling(window, min_periods=1).mean())
            train_df[f'{Config.TARGET}_rollstd_{window}'] = train_df.groupby(['ROW', 'COL'])[Config.TARGET].transform(
                lambda x: x.rolling(window, min_periods=1).std())
        
        # Create rolling stats for validation set
        train_val_df = pd.concat([train_df, val_df]).sort_values('DATETIME').reset_index(drop=True)
        for window in window_sizes:
            train_val_df[f'{Config.TARGET}_rollmean_{window}'] = train_val_df.groupby(['ROW', 'COL'])[Config.TARGET].transform(
                lambda x: x.rolling(window, min_periods=1).mean())
            train_val_df[f'{Config.TARGET}_rollstd_{window}'] = train_val_df.groupby(['ROW', 'COL'])[Config.TARGET].transform(
                lambda x: x.rolling(window, min_periods=1).std())
        val_df = train_val_df.iloc[len(train_df):].reset_index(drop=True)
        
        # Create rolling stats for test set
        full_df = pd.concat([train_df, val_df, test_df]).sort_values('DATETIME').reset_index(drop=True)
        for window in window_sizes:
            full_df[f'{Config.TARGET}_rollmean_{window}'] = full_df.groupby(['ROW', 'COL'])[Config.TARGET].transform(
                lambda x: x.rolling(window, min_periods=1).mean())
            full_df[f'{Config.TARGET}_rollstd_{window}'] = full_df.groupby(['ROW', 'COL'])[Config.TARGET].transform(
                lambda x: x.rolling(window, min_periods=1).std())
        test_df = full_df.iloc[len(train_df) + len(val_df):].reset_index(drop=True)
    
    # Handle missing values
    train_df = train_df.fillna(0)
    val_df = val_df.fillna(0)
    test_df = test_df.fillna(0)
    
    # Prepare feature columns - exclude DATETIME, ROW, COL and target column
    # Include selected_features that are in the DataFrame
    basic_cols = [col for col in selected_features if col in train_df.columns]
    lag_cols = [f'{Config.TARGET}_lag{lag}' for lag in lag_steps if f'{Config.TARGET}_lag{lag}' in train_df.columns]
    roll_cols = []
    
    for window in window_sizes:
        mean_col = f'{Config.TARGET}_rollmean_{window}'
        std_col = f'{Config.TARGET}_rollstd_{window}'
        if mean_col in train_df.columns:
            roll_cols.append(mean_col)
        if std_col in train_df.columns:
            roll_cols.append(std_col)
    
    feature_cols = basic_cols + lag_cols + roll_cols
    
    # Check if we have any features
    if not feature_cols:
        print(" No features detected! Creating default lag feature.")
        # Create at least one default lag feature
        default_lag = 1
        for df in [train_df, val_df, test_df]:
            df[f'{Config.TARGET}_lag{default_lag}'] = df.groupby(['ROW', 'COL'])[Config.TARGET].shift(default_lag)
            df = df.fillna(0)
        
        feature_cols = [f'{Config.TARGET}_lag{default_lag}']
    
    print(f"Using {len(feature_cols)} features: {feature_cols}")
    
    # Create sequences
    train_x, train_y = create_sequences(train_df, feature_cols, Config.TARGET, time_step_in, time_step_out, stride)
    val_x, val_y = create_sequences(val_df, feature_cols, Config.TARGET, time_step_in, time_step_out, stride)
    test_x, test_y = create_sequences(test_df, feature_cols, Config.TARGET, time_step_in, time_step_out, stride)
    
    if train_x.numel() == 0 or val_x.numel() == 0 or test_x.numel() == 0:
        print(" One or more sequence sets are empty")
        return None, None, None, 0
    
    print(f" Data shapes - Train: {train_x.shape}, Val: {val_x.shape}, Test: {test_x.shape}")
    
    # Create data loaders
    train_loader = DataLoader(TensorDataset(train_x, train_y), batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(TensorDataset(val_x, val_y), batch_size=batch_size)
    test_loader = DataLoader(TensorDataset(test_x, test_y), batch_size=batch_size)
    
    return train_loader, val_loader, test_loader, train_x.shape[2]

In [2]:
# Hybrid LSTM Classification-Regression Model
import torch
import torch.nn as nn
import torch.nn.functional as F

class HybridLSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size=64, num_layers=2, dropout=0.0, time_step_out=1):
        super(HybridLSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        
        # Shared feature extraction
        self.feature_layer = nn.Linear(hidden_size, hidden_size // 2)
        
        # Binary classification: rain or no rain
        self.rain_classifier = nn.Linear(hidden_size // 2, 1)  # Binary output (0=no rain, 1=rain)
        
        # Rain intensity classifier (conditionally used)
        self.intensity_classifier = nn.Linear(hidden_size // 2, 3)  # 3 classes: small, medium, heavy
        
        # Regression head for rain volume (conditionally used)
        self.regressor = nn.Linear(hidden_size // 2, time_step_out)
        
        self.time_step_out = time_step_out
        self.hidden_size = hidden_size

    def forward(self, x):
        # LSTM feature extraction
        lstm_out, _ = self.lstm(x)
        last_out = lstm_out[:, -1, :]  # Get the last time step output
        
        # Extract shared features
        features = F.relu(self.feature_layer(last_out))
        
        # Binary classification: rain or no rain
        rain_logits = self.rain_classifier(features)
        rain_prob = torch.sigmoid(rain_logits)  # Probability of rain
        
        # Rain intensity classification (3 classes)
        intensity_logits = self.intensity_classifier(features)
        intensity_probs = F.softmax(intensity_logits, dim=1)  # Probabilities for each intensity class
        
        # Regression for rain amount
        regression_out = self.regressor(features)
        
        return {
            'rain_logits': rain_logits,
            'rain_prob': rain_prob,
            'intensity_logits': intensity_logits,
            'intensity_probs': intensity_probs,
            'regression': regression_out
        }

# Custom loss function that combines binary classification, multiclass classification, and regression
class HybridRainfallLoss(nn.Module):
    def __init__(self, rain_threshold=0.1, class_boundaries=[0.0, 2.5, 6.0], 
                 binary_weight=1.0, intensity_weight=1.0, regression_weight=1.0):
        super(HybridRainfallLoss, self).__init__()
        self.rain_threshold = rain_threshold
        self.class_boundaries = class_boundaries
        
        # Loss weights
        self.binary_weight = binary_weight
        self.intensity_weight = intensity_weight
        self.regression_weight = regression_weight
        
        # Individual loss functions
        self.binary_loss_fn = nn.BCEWithLogitsLoss(reduction='mean')
        self.intensity_loss_fn = nn.CrossEntropyLoss(reduction='mean')
        self.regression_loss_fn = nn.MSELoss(reduction='mean')
    
    def forward(self, outputs, targets):
        # Extract model outputs
        rain_logits = outputs['rain_logits']
        intensity_logits = outputs['intensity_logits']
        regression = outputs['regression']
        
        # Prepare target values
        # Binary rain targets (1 if rain > threshold, 0 otherwise)
        binary_targets = (targets > self.rain_threshold).float()
        
        # Intensity class targets (0=small, 1=medium, 2=heavy)
        # Create class masks
        small_mask = (targets > self.class_boundaries[0]) & (targets <= self.class_boundaries[1])
        medium_mask = (targets > self.class_boundaries[1]) & (targets <= self.class_boundaries[2])
        heavy_mask = (targets > self.class_boundaries[2])
        
        # Convert to class indices (0, 1, or 2)
        intensity_targets = torch.zeros_like(targets, dtype=torch.long)
        intensity_targets[medium_mask] = 1
        intensity_targets[heavy_mask] = 2
        
        # Only consider intensity loss for samples where rain exists
        valid_intensity_mask = binary_targets.bool().squeeze()
        
        # Calculate losses
        binary_loss = self.binary_loss_fn(rain_logits, binary_targets)
        
        # Only calculate intensity loss on rainy samples (if any exist)
        if valid_intensity_mask.sum() > 0:
            intensity_loss = self.intensity_loss_fn(
                intensity_logits[valid_intensity_mask], 
                intensity_targets[valid_intensity_mask].squeeze()
            )
        else:
            intensity_loss = torch.tensor(0.0, device=rain_logits.device)
        
        # For regression, we can either:
        # 1. Apply regression loss to all samples
        # 2. Apply regression loss only to rainy samples
        # Let's use option 2 for better focus on rain prediction
        if valid_intensity_mask.sum() > 0:
            regression_loss = self.regression_loss_fn(
                regression[valid_intensity_mask], 
                targets[valid_intensity_mask]
            )
        else:
            regression_loss = torch.tensor(0.0, device=rain_logits.device)
        
        # Combine losses with weights
        total_loss = (
            self.binary_weight * binary_loss +
            self.intensity_weight * intensity_loss +
            self.regression_weight * regression_loss
        )
        
        return {
            'total_loss': total_loss,
            'binary_loss': binary_loss.item(),
            'intensity_loss': intensity_loss.item() if isinstance(intensity_loss, torch.Tensor) else intensity_loss,
            'regression_loss': regression_loss.item() if isinstance(regression_loss, torch.Tensor) else regression_loss
        }

# Function to get final predictions combining classification and regression
def get_hybrid_predictions(outputs):
    rain_prob = outputs['rain_prob']
    intensity_probs = outputs['intensity_probs']
    regression = outputs['regression']
    
    # Binary decision: rain or no rain
    is_rain = rain_prob > 0.5
    
    # Get the most likely intensity class
    _, intensity_class = torch.max(intensity_probs, dim=1)
    
    # Initialize predictions with zeros (no rain)
    final_predictions = torch.zeros_like(regression)
    
    # Where rain is predicted, use regression values
    final_predictions[is_rain] = regression[is_rain]
    
    return {
        'is_rain': is_rain,
        'intensity_class': intensity_class,
        'regression': regression,
        'final_predictions': final_predictions
    }

In [3]:
# Modified train_model function for hybrid model
def train_hybrid_model(model, loader, optimizer, loss_fn, epochs, patience=3):
    """Train hybrid model with early stopping"""
    model.train()
    best_loss = float('inf')
    patience_counter = 0
    
    for epoch in range(epochs):
        running_loss = 0.0
        running_binary_loss = 0.0
        running_intensity_loss = 0.0
        running_regression_loss = 0.0
        
        for xb, yb in loader:
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            optimizer.zero_grad()
            
            # Get model outputs
            outputs = model(xb)
            
            # Calculate loss
            loss_dict = loss_fn(outputs, yb)
            total_loss = loss_dict['total_loss']
            
            # Backpropagation
            total_loss.backward()
            optimizer.step()
            
            # Track losses
            running_loss += total_loss.item() * xb.size(0)
            running_binary_loss += loss_dict['binary_loss'] * xb.size(0)
            running_intensity_loss += loss_dict['intensity_loss'] * xb.size(0)
            running_regression_loss += loss_dict['regression_loss'] * xb.size(0)
        
        # Calculate average losses
        avg_loss = running_loss / len(loader.dataset)
        avg_binary_loss = running_binary_loss / len(loader.dataset)
        avg_intensity_loss = running_intensity_loss / len(loader.dataset)
        avg_regression_loss = running_regression_loss / len(loader.dataset)
        
        print(f"   Epoch {epoch+1}/{epochs}, Total Loss: {avg_loss:.4f}, "
              f"Binary: {avg_binary_loss:.4f}, Intensity: {avg_intensity_loss:.4f}, "
              f"Regression: {avg_regression_loss:.4f}")
        
        # Early stopping check
        if avg_loss < best_loss:
            best_loss = avg_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"   Early stopping at epoch {epoch+1}")
                break
    
    return best_loss

# Modified evaluate_model function for hybrid model
def evaluate_hybrid_model(model, loader, rain_threshold=0.1):
    """Evaluate hybrid model and return metrics"""
    model.eval()
    all_preds = []
    all_targets = []
    all_binary_preds = []
    all_binary_targets = []
    all_intensity_preds = []
    all_intensity_targets = []
    
    # Classification thresholds for defining rain intensity classes
    class_boundaries = [0.0, 2.5, 6.0]
    
    with torch.no_grad():
        for xb, yb in loader:
            xb = xb.to(DEVICE)
            outputs = model(xb)
            
            # Get predictions
            pred_dict = get_hybrid_predictions(outputs)
            final_preds = pred_dict['final_predictions'].cpu()
            is_rain = pred_dict['is_rain'].cpu()
            intensity_class = pred_dict['intensity_class'].cpu()
            
            # Get binary targets (rain/no rain)
            binary_targets = (yb > rain_threshold).float()
            
            # Get intensity class targets
            intensity_targets = torch.zeros_like(yb, dtype=torch.long)
            intensity_targets[(yb > class_boundaries[0]) & (yb <= class_boundaries[1])] = 0  # Small rain
            intensity_targets[(yb > class_boundaries[1]) & (yb <= class_boundaries[2])] = 1  # Medium rain
            intensity_targets[yb > class_boundaries[2]] = 2  # Heavy rain
            
            # Store predictions and targets
            all_preds.append(final_preds)
            all_targets.append(yb)
            all_binary_preds.append(is_rain)
            all_binary_targets.append(binary_targets)
            all_intensity_preds.append(intensity_class)
            all_intensity_targets.append(intensity_targets)
    
    # Concatenate results
    all_preds = torch.cat(all_preds).squeeze().numpy()
    all_targets = torch.cat(all_targets).squeeze().numpy()
    all_binary_preds = torch.cat(all_binary_preds).squeeze().numpy()
    all_binary_targets = torch.cat(all_binary_targets).squeeze().numpy()
    all_intensity_preds = torch.cat(all_intensity_preds).squeeze().numpy()
    all_intensity_targets = torch.cat(all_intensity_targets).squeeze().numpy()
    
    # Calculate regression metrics
    rmse = mean_squared_error(all_targets, all_preds, squared=False)
    bias = np.mean(all_preds - all_targets)
    r2 = r2_score(all_targets, all_preds)
    
    # Calculate classification metrics
    binary_accuracy = np.mean(all_binary_preds == all_binary_targets)
    
    # Calculate Critical Success Index (CSI) for rain detection
    tp = np.sum((all_binary_preds == 1) & (all_binary_targets == 1))
    fp = np.sum((all_binary_preds == 1) & (all_binary_targets == 0))
    fn = np.sum((all_binary_preds == 0) & (all_binary_targets == 1))
    csi = tp / (tp + fp + fn + 1e-9)
    
    # Calculate intensity classification accuracy (only for actual rain samples)
    rain_mask = all_binary_targets == 1
    if np.sum(rain_mask) > 0:
        intensity_accuracy = np.mean(all_intensity_preds[rain_mask] == all_intensity_targets[rain_mask])
    else:
        intensity_accuracy = np.nan
    
    # Return all metrics
    metrics = {
        'rmse': rmse,
        'bias': bias,
        'r2': r2,
        'csi': csi,
        'binary_accuracy': binary_accuracy,
        'intensity_accuracy': intensity_accuracy
    }
    
    return metrics

In [4]:
# Modified objective function for Optuna optimization with hybrid model
def hybrid_objective(trial, month):
    """Optuna objective function for hybrid model using cross-validation across folds"""
    # Hyperparameters to tune
    hidden_size = trial.suggest_int("hidden_size", 32, 256)
    num_layers = trial.suggest_int("num_layers", 1, 3)
    dropout = trial.suggest_float("dropout", 0.0, 0.5)
    lr = trial.suggest_float("lr", 1e-4, 1e-2, log=True)
    time_step_in = trial.suggest_int("time_step_in", 12, 48)
    time_step_out = trial.suggest_int("time_step_out", 1, 6)
    stride = trial.suggest_int("stride", 1, 6)
    
    # Loss weights
    binary_weight = trial.suggest_float("binary_weight", 0.5, 2.0)
    intensity_weight = trial.suggest_float("intensity_weight", 0.5, 2.0) 
    regression_weight = trial.suggest_float("regression_weight", 0.5, 2.0)
    
    # Rain threshold (for binary classification)
    rain_threshold = trial.suggest_float("rain_threshold", 0.05, 0.2)
    
    # Lag steps and window sizes
    num_lags = trial.suggest_int("num_lags", 1, 3)
    lag_steps = [trial.suggest_int(f"lag_{i}", 1, 12) for i in range(num_lags)]
    
    num_windows = trial.suggest_int("num_windows", 1, 3)
    window_sizes = [trial.suggest_int(f"window_{i}", 3, 24) for i in range(num_windows)]
    
    # Get batch size from config
    config_key = month.split("-")[1]
    config = MODEL_CONFIG[config_key]
    batch_size = config["BATCH_SIZE"]
    
    # Cross-validation across folds
    fold_val_rmses = []
    
    for fold in folds:
        try:
            # Prepare data loaders
            train_loader, val_loader, _, input_size = prepare_data_loaders(
                month, fold, lag_steps, window_sizes, time_step_in, time_step_out, batch_size, stride
            )
            
            if train_loader is None or val_loader is None:
                # Skip this fold if data preparation failed
                continue
            
            # Create hybrid model with trial parameters
            model = HybridLSTMModel(
                input_size=input_size, 
                hidden_size=hidden_size, 
                num_layers=num_layers, 
                dropout=dropout,
                time_step_out=time_step_out
            ).to(DEVICE)
            
            # Class boundaries for rainfall intensity
            class_boundaries = [0.0, 2.5, 6.0]
            
            # Create hybrid loss function
            loss_fn = HybridRainfallLoss(
                rain_threshold=rain_threshold,
                class_boundaries=class_boundaries,
                binary_weight=binary_weight,
                intensity_weight=intensity_weight,
                regression_weight=regression_weight
            )
            
            optimizer = torch.optim.Adam(model.parameters(), lr=lr)
            
            # Train for a small number of epochs during hyperparameter search
            train_hybrid_model(model, train_loader, optimizer, loss_fn, epochs=5, patience=2)
            
            # Evaluate on validation set
            val_metrics = evaluate_hybrid_model(model, val_loader, rain_threshold=rain_threshold)
            val_rmse = val_metrics['rmse']
            fold_val_rmses.append(val_rmse)
            
        except Exception as e:
            print(f"Error in fold {fold}: {str(e)}")
            continue
    
    # Return mean validation RMSE across folds
    if fold_val_rmses:
        mean_val_rmse = sum(fold_val_rmses) / len(fold_val_rmses)
        return mean_val_rmse
    else:
        return float('inf')

# Function for final evaluation on test set with hybrid model
def evaluate_hybrid_on_test(month, fold, best_params):
    """Train and evaluate hybrid model with best parameters on test set"""
    print(f"\n Training hybrid model for {month}, {fold} with best parameters")
    
    # Extract parameters
    hidden_size = best_params["hidden_size"]
    num_layers = best_params["num_layers"]
    dropout = best_params["dropout"]
    lr = best_params["lr"]
    time_step_in = best_params["time_step_in"]
    time_step_out = best_params["time_step_out"]
    stride = best_params["stride"]
    
    # Loss weights
    binary_weight = best_params["binary_weight"]
    intensity_weight = best_params["intensity_weight"] 
    regression_weight = best_params["regression_weight"]
    
    # Rain threshold
    rain_threshold = best_params["rain_threshold"]
    
    # Class boundaries for rainfall intensity
    class_boundaries = [0.0, 2.5, 6.0]
    
    # Extract lag steps and window sizes
    num_lags = best_params["num_lags"]
    lag_steps = [best_params[f"lag_{i}"] for i in range(num_lags)]
    
    num_windows = best_params["num_windows"]
    window_sizes = [best_params[f"window_{i}"] for i in range(num_windows)]
    
    # Get config
    config_key = month.split("-")[1]
    config = MODEL_CONFIG[config_key]
    batch_size = config["BATCH_SIZE"]
    epochs = config["EPOCHS"]
    
    # Prepare data
    train_loader, val_loader, test_loader, input_size = prepare_data_loaders(
        month, fold, lag_steps, window_sizes, time_step_in, time_step_out, batch_size, stride
    )
    
    if train_loader is None or val_loader is None or test_loader is None:
        print(" Failed to prepare data")
        return None
    
    # Create hybrid model
    model = HybridLSTMModel(
        input_size=input_size,
        hidden_size=hidden_size,
        num_layers=num_layers,
        dropout=dropout,
        time_step_out=time_step_out
    ).to(DEVICE)
    
    # Create hybrid loss function
    loss_fn = HybridRainfallLoss(
        rain_threshold=rain_threshold,
        class_boundaries=class_boundaries,
        binary_weight=binary_weight,
        intensity_weight=intensity_weight,
        regression_weight=regression_weight
    )
    
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    
    # Train model on train set
    print(f"====== Training hybrid model on train set...")
    train_hybrid_model(model, train_loader, optimizer, loss_fn, epochs=epochs, patience=5)
    
    # Evaluate on validation set
    print("====== Evaluating on validation set...")
    val_metrics = evaluate_hybrid_model(model, val_loader, rain_threshold=rain_threshold)
    print(f"  Val RMSE: {val_metrics['rmse']:.4f}, Bias: {val_metrics['bias']:.4f}, "
          f"R²: {val_metrics['r2']:.4f}, CSI: {val_metrics['csi']:.4f}")
    print(f"  Binary Accuracy: {val_metrics['binary_accuracy']:.4f}, "
          f"Intensity Accuracy: {val_metrics['intensity_accuracy']:.4f}")
    
    # Evaluate on test set
    print("====== Evaluating on test set...")
    test_metrics = evaluate_hybrid_model(model, test_loader, rain_threshold=rain_threshold)
    print(f"  Test RMSE: {test_metrics['rmse']:.4f}, Bias: {test_metrics['bias']:.4f}, "
          f"R²: {test_metrics['r2']:.4f}, CSI: {test_metrics['csi']:.4f}")
    print(f"  Binary Accuracy: {test_metrics['binary_accuracy']:.4f}, "
          f"Intensity Accuracy: {test_metrics['intensity_accuracy']:.4f}")
    
    # Save model for this fold
    model_path = f"hybrid_model_{month}_{fold}.pt"
    torch.save(model.state_dict(), model_path)
    print(f" Model saved to {model_path}")
    
    # Return results
    return {
        "month": month,
        "fold": fold,
        "test_rmse": round(test_metrics['rmse'], 4),
        "test_bias": round(test_metrics['bias'], 4),
        "test_r2": round(test_metrics['r2'], 4),
        "test_csi": round(test_metrics['csi'], 4),
        "test_binary_acc": round(test_metrics['binary_accuracy'], 4),
        "test_intensity_acc": round(test_metrics['intensity_accuracy'], 4) if not np.isnan(test_metrics['intensity_accuracy']) else None,
        "val_rmse": round(val_metrics['rmse'], 4),
        "val_bias": round(val_metrics['bias'], 4),
        "val_r2": round(val_metrics['r2'], 4),
        "val_csi": round(val_metrics['csi'], 4),
        "val_binary_acc": round(val_metrics['binary_accuracy'], 4),
        "val_intensity_acc": round(val_metrics['intensity_accuracy'], 4) if not np.isnan(val_metrics['intensity_accuracy']) else None,
        "model": model
    }

# Updated main execution function for hybrid model
def run_hybrid_optuna_optimization():
    """Run Optuna optimization for hybrid model with cross-validation and final test evaluation"""
    month_results = {}
    
    for month in months:
        print(f"\n###### Processing month: {month}")
        
        # Step 1: Find the best hyperparameters using cross-validation
        study_name = f"hybrid_{month}_study"
        study = optuna.create_study(direction="minimize", study_name=study_name)
        
        # Create a partial function with fixed month
        objective_func = partial(hybrid_objective, month=month)
        
        # Run optimization with cross-validation
        try:
            print(f" Finding optimal hyperparameters for hybrid model on {month} using cross-validation...")
            study.optimize(objective_func, n_trials=20, timeout=3600)  # 1 hour timeout
            
            best_params = study.best_params
            print(f" Best parameters for {month}:")
            for key, value in best_params.items():
                print(f"  {key}: {value}")
            
            # Save best parameters for the month
            best_params_path = f"hybrid_best_params_{month}.json"
            with open(best_params_path, "w") as f:
                json.dump(best_params, f, indent=2)
            
            print(f" Best parameters saved to {best_params_path}")
            
            # Step 2: Final evaluation on test set for each fold
            fold_test_results = []
            for fold in folds:
                print(f"\n Final evaluation on {month}, {fold} test set")
                result = evaluate_hybrid_on_test(month, fold, best_params)
                if result:
                    fold_test_results.append(result)
            
            # Calculate mean performance across folds
            if fold_test_results:
                mean_rmse = sum(r["test_rmse"] for r in fold_test_results) / len(fold_test_results)
                mean_bias = sum(r["test_bias"] for r in fold_test_results) / len(fold_test_results)
                mean_r2 = sum(r["test_r2"] for r in fold_test_results) / len(fold_test_results)
                mean_csi = sum(r["test_csi"] for r in fold_test_results) / len(fold_test_results)
                
                # Calculate mean classification accuracies
                mean_binary_acc = sum(r["test_binary_acc"] for r in fold_test_results) / len(fold_test_results)
                
                # Handle potential None values for intensity accuracy
                intensity_accuracies = [r["test_intensity_acc"] for r in fold_test_results if r["test_intensity_acc"] is not None]
                mean_intensity_acc = sum(intensity_accuracies) / len(intensity_accuracies) if intensity_accuracies else None
                
                # Save best fold result (lowest RMSE)
                best_fold_result = min(fold_test_results, key=lambda x: x["test_rmse"])
                best_model = best_fold_result["model"]
                best_model_path = f"hybrid_best_model_{month}.pt"
                torch.save(best_model.state_dict(), best_model_path)
                
                # Store results for this month
                month_results[month] = {
                    "month": month,
                    "best_fold": best_fold_result["fold"],
                    "best_test_rmse": best_fold_result["test_rmse"],
                    "best_test_bias": best_fold_result["test_bias"],
                    "best_test_r2": best_fold_result["test_r2"],
                    "best_test_csi": best_fold_result["test_csi"],
                    "best_test_binary_acc": best_fold_result["test_binary_acc"],
                    "best_test_intensity_acc": best_fold_result["test_intensity_acc"],
                    "mean_test_rmse": round(mean_rmse, 4),
                    "mean_test_bias": round(mean_bias, 4),
                    "mean_test_r2": round(mean_r2, 4),
                    "mean_test_csi": round(mean_csi, 4),
                    "mean_test_binary_acc": round(mean_binary_acc, 4),
                    "mean_test_intensity_acc": round(mean_intensity_acc, 4) if mean_intensity_acc is not None else None,
                    "best_params": best_params,
                    "fold_results": fold_test_results
                }
                
                print(f"\n Mean test performance for {month}:")
                print(f"  RMSE: {mean_rmse:.4f}, Bias: {mean_bias:.4f}, R²: {mean_r2:.4f}, CSI: {mean_csi:.4f}")
                print(f"  Binary Acc: {mean_binary_acc:.4f}, Intensity Acc: {mean_intensity_acc:.4f if mean_intensity_acc is not None else 'N/A'}")
                print(f" Best fold test performance for {month} (fold {best_fold_result['fold']}):")
                print(f"  RMSE: {best_fold_result['test_rmse']:.4f}, Bias: {best_fold_result['test_bias']:.4f}, "
                      f"R²: {best_fold_result['test_r2']:.4f}, CSI: {best_fold_result['test_csi']:.4f}")
                print(f" Best hybrid model for {month} saved to {best_model_path}")
            
        except Exception as e:
            print(f" Error during optimization for {month}: {str(e)}")
            continue
    
    # Calculate overall mean performance across months
    if month_results:
        overall_mean_rmse = sum(r["best_test_rmse"] for r in month_results.values()) / len(month_results)
        overall_mean_bias = sum(r["best_test_bias"] for r in month_results.values()) / len(month_results)
        overall_mean_r2 = sum(r["best_test_r2"] for r in month_results.values()) / len(month_results)
        overall_mean_csi = sum(r["best_test_csi"] for r in month_results.values()) / len(month_results)
        overall_mean_binary_acc = sum(r["best_test_binary_acc"] for r in month_results.values()) / len(month_results)
        
        # Handle potential None values for intensity accuracy
        intensity_accuracies = [r["best_test_intensity_acc"] for r in month_results.values() if r["best_test_intensity_acc"] is not None]
        overall_mean_intensity_acc = sum(intensity_accuracies) / len(intensity_accuracies) if intensity_accuracies else None
        
        print("\n Overall mean performance of hybrid model across all months:")
        print(f"  RMSE: {overall_mean_rmse:.4f}, Bias: {overall_mean_bias:.4f}, R²: {overall_mean_r2:.4f}, CSI: {overall_mean_csi:.4f}")
        intensity_acc_str = format(overall_mean_intensity_acc, '.4f') if overall_mean_intensity_acc is not None else 'N/A'
        print(f"  Binary Accuracy: {overall_mean_binary_acc:.4f}, Intensity Accuracy: {intensity_acc_str}")
    
    # Create a dataframe with month results
    month_results_df = pd.DataFrame([
        {
            "month": month,
            "best_fold": data["best_fold"],
            "best_test_rmse": data["best_test_rmse"],
            "best_test_bias": data["best_test_bias"],
            "best_test_r2": data["best_test_r2"],
            "best_test_csi": data["best_test_csi"],
            "best_test_binary_acc": data["best_test_binary_acc"],
            "best_test_intensity_acc": data["best_test_intensity_acc"],
            "mean_test_rmse": data["mean_test_rmse"],
            "mean_test_bias": data["mean_test_bias"],
            "mean_test_r2": data["mean_test_r2"],
            "mean_test_csi": data["mean_test_csi"],
            "mean_test_binary_acc": data["mean_test_binary_acc"],
            "mean_test_intensity_acc": data["mean_test_intensity_acc"],
        } for month, data in month_results.items()
    ])
    
    # Display month-wise results
    print("\n Results by Month:")
    print(month_results_df)
    
    # Save results
    month_results_df.to_csv("hybrid_month_results.csv", index=False)
    
    # Save detailed results
    all_fold_results = []
    for month, data in month_results.items():
        for fold_result in data["fold_results"]:
            all_fold_results.append({
                "month": month,
                "fold": fold_result["fold"],
                "test_rmse": fold_result["test_rmse"],
                "test_bias": fold_result["test_bias"],
                "test_r2": fold_result["test_r2"],
                "test_csi": fold_result["test_csi"],
                "test_binary_acc": fold_result["test_binary_acc"],
                "test_intensity_acc": fold_result["test_intensity_acc"],
                "val_rmse": fold_result["val_rmse"],
                "val_bias": fold_result["val_bias"],
                "val_r2": fold_result["val_r2"],
                "val_csi": fold_result["val_csi"],
                "val_binary_acc": fold_result["val_binary_acc"],
                "val_intensity_acc": fold_result["val_intensity_acc"] if "val_intensity_acc" in fold_result else None,
            })
    
    all_fold_results_df = pd.DataFrame(all_fold_results)
    all_fold_results_df.to_csv("hybrid_all_fold_results.csv", index=False)
    
    print(" Results saved to hybrid_month_results.csv and hybrid_all_fold_results.csv")
    
    return month_results

In [5]:
# Execute optimization
if __name__ == "__main__":
    print(f"Running on device: {DEVICE}")
    run_hybrid_optuna_optimization()

[I 2025-05-06 17:09:20,841] A new study created in memory with name: hybrid_2019-04_study


Running on device: cuda

###### Processing month: 2019-04
 Finding optimal hyperparameters for hybrid model on 2019-04 using cross-validation...
Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag10', 'AWS_rollmean_5', 'AWS_rollstd_5', 'AWS_rollmean_22', 'AWS_rollstd_22', 'AWS_rollmean_13', 'AWS_rollstd_13']


  return torch.tensor(sequences, dtype=torch.float32), torch.tensor(targets, dtype=torch.float32)


 Data shapes - Train: torch.Size([3340, 36, 19]), Val: torch.Size([3340, 36, 19]), Test: torch.Size([4003, 36, 19])




Error in fold fold_1: Target size (torch.Size([64, 5])) must be the same as input size (torch.Size([64, 1]))
Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag10', 'AWS_rollmean_5', 'AWS_rollstd_5', 'AWS_rollmean_22', 'AWS_rollstd_22', 'AWS_rollmean_13', 'AWS_rollstd_13']
 Data shapes - Train: torch.Size([8684, 36, 19]), Val: torch.Size([3340, 36, 19]), Test: torch.Size([4003, 36, 19])
Error in fold fold_2: Target size (torch.Size([64, 5])) must be the same as input size (torch.Size([64, 1]))




Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag10', 'AWS_rollmean_5', 'AWS_rollstd_5', 'AWS_rollmean_22', 'AWS_rollstd_22', 'AWS_rollmean_13', 'AWS_rollstd_13']
 Data shapes - Train: torch.Size([14028, 36, 19]), Val: torch.Size([3340, 36, 19]), Test: torch.Size([4003, 36, 19])
Error in fold fold_3: Target size (torch.Size([64, 5])) must be the same as input size (torch.Size([64, 1]))




Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag10', 'AWS_rollmean_5', 'AWS_rollstd_5', 'AWS_rollmean_22', 'AWS_rollstd_22', 'AWS_rollmean_13', 'AWS_rollstd_13']
 Data shapes - Train: torch.Size([19372, 36, 19]), Val: torch.Size([3340, 36, 19]), Test: torch.Size([4003, 36, 19])
Error in fold fold_4: Target size (torch.Size([64, 5])) must be the same as input size (torch.Size([64, 1]))




Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag10', 'AWS_rollmean_5', 'AWS_rollstd_5', 'AWS_rollmean_22', 'AWS_rollstd_22', 'AWS_rollmean_13', 'AWS_rollstd_13']


[I 2025-05-06 17:10:04,080] Trial 0 finished with value: inf and parameters: {'hidden_size': 220, 'num_layers': 1, 'dropout': 0.2337600464482837, 'lr': 0.0012361483751355234, 'time_step_in': 36, 'time_step_out': 5, 'stride': 6, 'binary_weight': 1.7725591057667287, 'intensity_weight': 1.0133679501722475, 'regression_weight': 0.9082681286820855, 'rain_threshold': 0.17693921407629987, 'num_lags': 1, 'lag_0': 10, 'num_windows': 3, 'window_0': 5, 'window_1': 22, 'window_2': 13}. Best is trial 0 with value: inf.


 Data shapes - Train: torch.Size([24716, 36, 19]), Val: torch.Size([3340, 36, 19]), Test: torch.Size([4003, 36, 19])
Error in fold fold_5: Target size (torch.Size([64, 5])) must be the same as input size (torch.Size([64, 1]))
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_rollmean_5', 'AWS_rollstd_5', 'AWS_rollmean_22', 'AWS_rollstd_22']
 Data shapes - Train: torch.Size([26386, 17, 17]), Val: torch.Size([26386, 17, 17]), Test: torch.Size([28819, 17, 17])
   Epoch 1/5, Total Loss: 3.5024, Binary: 0.4992, Intensity: 0.3399, Regression: 2.9668
   Epoch 2/5, Total Loss: 3.3585, Binary: 0.4536, Intensity: 0.2618, Regression: 2.9337
Error in fold fold_1: Expected input batch_size (1) to match target batch_size (0).
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_rollmean_5', 'AWS_rollstd_5', 'AWS_rollmean_22', 'AWS_rollstd_22']
 

[I 2025-05-06 17:12:34,909] Trial 1 finished with value: 0.32380443811416626 and parameters: {'hidden_size': 235, 'num_layers': 3, 'dropout': 0.13481954837955085, 'lr': 0.0001071753800599473, 'time_step_in': 17, 'time_step_out': 1, 'stride': 1, 'binary_weight': 0.6007463715986066, 'intensity_weight': 1.0895435472451163, 'regression_weight': 0.9546741894984316, 'rain_threshold': 0.12065201226890497, 'num_lags': 1, 'lag_0': 12, 'num_windows': 2, 'window_0': 5, 'window_1': 22}. Best is trial 1 with value: 0.32380443811416626.


Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag7', 'AWS_lag3', 'AWS_lag6', 'AWS_rollmean_19', 'AWS_rollstd_19', 'AWS_rollmean_11', 'AWS_rollstd_11']
 Data shapes - Train: torch.Size([16366, 42, 19]), Val: torch.Size([16366, 42, 19]), Test: torch.Size([21417, 42, 19])
Error in fold fold_1: Target size (torch.Size([64, 6])) must be the same as input size (torch.Size([64, 1]))
Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag7', 'AWS_lag3', 'AWS_lag6', 'AWS_rollmean_19', 'AWS_rollstd_19', 'AWS_rollmean_11', 'AWS_rollstd_11']
 Data shapes - Train: torch.Size([48430, 42, 19]), Val: torch.Size([16366, 42, 19]), Test: torch.Size([21417, 42, 19])
Error in fold fold_2: Target size (torch.Size([64, 6])) must be the same as input size (torch.Size([64, 1]))
Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SS

[I 2025-05-06 17:14:16,612] Trial 2 finished with value: inf and parameters: {'hidden_size': 245, 'num_layers': 3, 'dropout': 0.006434331969249529, 'lr': 0.002657204138247596, 'time_step_in': 42, 'time_step_out': 6, 'stride': 1, 'binary_weight': 1.0561048606289964, 'intensity_weight': 1.9847654562617523, 'regression_weight': 1.6212249970069572, 'rain_threshold': 0.07236466447520526, 'num_lags': 3, 'lag_0': 7, 'lag_1': 3, 'lag_2': 6, 'num_windows': 2, 'window_0': 19, 'window_1': 11}. Best is trial 1 with value: 0.32380443811416626.


 Data shapes - Train: torch.Size([144622, 42, 19]), Val: torch.Size([16366, 42, 19]), Test: torch.Size([21417, 42, 19])
Error in fold fold_5: Target size (torch.Size([64, 6])) must be the same as input size (torch.Size([64, 1]))
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag10', 'AWS_lag4', 'AWS_lag8', 'AWS_rollmean_4', 'AWS_rollstd_4']
 Data shapes - Train: torch.Size([5344, 45, 17]), Val: torch.Size([5344, 45, 17]), Test: torch.Size([7069, 45, 17])
Error in fold fold_1: Target size (torch.Size([64, 5])) must be the same as input size (torch.Size([64, 1]))




Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag10', 'AWS_lag4', 'AWS_lag8', 'AWS_rollmean_4', 'AWS_rollstd_4']
 Data shapes - Train: torch.Size([16032, 45, 17]), Val: torch.Size([5344, 45, 17]), Test: torch.Size([7069, 45, 17])
Error in fold fold_2: Target size (torch.Size([64, 5])) must be the same as input size (torch.Size([64, 1]))




Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag10', 'AWS_lag4', 'AWS_lag8', 'AWS_rollmean_4', 'AWS_rollstd_4']
 Data shapes - Train: torch.Size([26720, 45, 17]), Val: torch.Size([5344, 45, 17]), Test: torch.Size([7069, 45, 17])
Error in fold fold_3: Target size (torch.Size([64, 5])) must be the same as input size (torch.Size([64, 1]))




Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag10', 'AWS_lag4', 'AWS_lag8', 'AWS_rollmean_4', 'AWS_rollstd_4']
 Data shapes - Train: torch.Size([37408, 45, 17]), Val: torch.Size([5344, 45, 17]), Test: torch.Size([7069, 45, 17])
Error in fold fold_4: Target size (torch.Size([64, 5])) must be the same as input size (torch.Size([64, 1]))




Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag10', 'AWS_lag4', 'AWS_lag8', 'AWS_rollmean_4', 'AWS_rollstd_4']


[I 2025-05-06 17:14:54,797] Trial 3 finished with value: inf and parameters: {'hidden_size': 167, 'num_layers': 1, 'dropout': 0.2315826253075916, 'lr': 0.00026354203993916343, 'time_step_in': 45, 'time_step_out': 5, 'stride': 3, 'binary_weight': 1.207702510156086, 'intensity_weight': 1.5064189612605716, 'regression_weight': 0.6284372309167696, 'rain_threshold': 0.05985111352800159, 'num_lags': 3, 'lag_0': 10, 'lag_1': 4, 'lag_2': 8, 'num_windows': 1, 'window_0': 4}. Best is trial 1 with value: 0.32380443811416626.


 Data shapes - Train: torch.Size([48096, 45, 17]), Val: torch.Size([5344, 45, 17]), Test: torch.Size([7069, 45, 17])
Error in fold fold_5: Target size (torch.Size([64, 5])) must be the same as input size (torch.Size([64, 1]))
Using 20 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_lag4', 'AWS_rollmean_12', 'AWS_rollstd_12', 'AWS_rollmean_13', 'AWS_rollstd_13', 'AWS_rollmean_6', 'AWS_rollstd_6']
 Data shapes - Train: torch.Size([4342, 32, 20]), Val: torch.Size([4342, 32, 20]), Test: torch.Size([5156, 32, 20])




   Epoch 1/5, Total Loss: 7.5629, Binary: 0.6584, Intensity: 1.0184, Regression: 4.5986
   Epoch 2/5, Total Loss: 6.3758, Binary: 0.4265, Intensity: 0.4851, Regression: 4.2022
   Epoch 3/5, Total Loss: 6.2639, Binary: 0.4152, Intensity: 0.4190, Regression: 4.1610
   Epoch 4/5, Total Loss: 5.5646, Binary: 0.4120, Intensity: 0.3901, Regression: 3.6401
   Epoch 5/5, Total Loss: 5.6733, Binary: 0.4076, Intensity: 0.3883, Regression: 3.7295
Using 20 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_lag4', 'AWS_rollmean_12', 'AWS_rollstd_12', 'AWS_rollmean_13', 'AWS_rollstd_13', 'AWS_rollmean_6', 'AWS_rollstd_6']
 Data shapes - Train: torch.Size([10688, 32, 20]), Val: torch.Size([4342, 32, 20]), Test: torch.Size([5156, 32, 20])
Error in fold fold_2: Expected input batch_size (1) to match target batch_size (0).




Using 20 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_lag4', 'AWS_rollmean_12', 'AWS_rollstd_12', 'AWS_rollmean_13', 'AWS_rollstd_13', 'AWS_rollmean_6', 'AWS_rollstd_6']
 Data shapes - Train: torch.Size([17368, 32, 20]), Val: torch.Size([4342, 32, 20]), Test: torch.Size([5156, 32, 20])
Error in fold fold_3: Expected input batch_size (1) to match target batch_size (0).




Using 20 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_lag4', 'AWS_rollmean_12', 'AWS_rollstd_12', 'AWS_rollmean_13', 'AWS_rollstd_13', 'AWS_rollmean_6', 'AWS_rollstd_6']
 Data shapes - Train: torch.Size([23714, 32, 20]), Val: torch.Size([4342, 32, 20]), Test: torch.Size([5156, 32, 20])
Error in fold fold_4: Expected input batch_size (1) to match target batch_size (0).




Using 20 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_lag4', 'AWS_rollmean_12', 'AWS_rollstd_12', 'AWS_rollmean_13', 'AWS_rollstd_13', 'AWS_rollmean_6', 'AWS_rollstd_6']
 Data shapes - Train: torch.Size([30060, 32, 20]), Val: torch.Size([4342, 32, 20]), Test: torch.Size([5156, 32, 20])




   Epoch 1/5, Total Loss: 2.6905, Binary: 0.4494, Intensity: 0.2484, Regression: 1.4546
   Epoch 2/5, Total Loss: 2.2126, Binary: 0.2979, Intensity: 0.1301, Regression: 1.3118
   Epoch 3/5, Total Loss: 1.9609, Binary: 0.2366, Intensity: 0.1011, Regression: 1.1998
   Epoch 4/5, Total Loss: 1.9745, Binary: 0.2370, Intensity: 0.0997, Regression: 1.2105


[I 2025-05-06 17:15:40,068] Trial 4 finished with value: 0.22384402435272932 and parameters: {'hidden_size': 240, 'num_layers': 1, 'dropout': 0.27748151966818646, 'lr': 0.00014994748053928433, 'time_step_in': 32, 'time_step_out': 1, 'stride': 5, 'binary_weight': 1.431654842765001, 'intensity_weight': 0.6386191930257591, 'regression_weight': 1.2982158902018033, 'rain_threshold': 0.12981188082127484, 'num_lags': 2, 'lag_0': 12, 'lag_1': 4, 'num_windows': 3, 'window_0': 12, 'window_1': 13, 'window_2': 6}. Best is trial 4 with value: 0.22384402435272932.


   Epoch 5/5, Total Loss: 1.9468, Binary: 0.2269, Intensity: 0.0983, Regression: 1.2011
Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag3', 'AWS_lag2', 'AWS_lag6', 'AWS_rollmean_23', 'AWS_rollstd_23', 'AWS_rollmean_9', 'AWS_rollstd_9']
 Data shapes - Train: torch.Size([3340, 44, 19]), Val: torch.Size([3340, 44, 19]), Test: torch.Size([4434, 44, 19])
Error in fold fold_1: Target size (torch.Size([64, 3])) must be the same as input size (torch.Size([64, 1]))




Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag3', 'AWS_lag2', 'AWS_lag6', 'AWS_rollmean_23', 'AWS_rollstd_23', 'AWS_rollmean_9', 'AWS_rollstd_9']
 Data shapes - Train: torch.Size([10020, 44, 19]), Val: torch.Size([3340, 44, 19]), Test: torch.Size([4434, 44, 19])
Error in fold fold_2: Target size (torch.Size([64, 3])) must be the same as input size (torch.Size([64, 1]))




Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag3', 'AWS_lag2', 'AWS_lag6', 'AWS_rollmean_23', 'AWS_rollstd_23', 'AWS_rollmean_9', 'AWS_rollstd_9']
 Data shapes - Train: torch.Size([16366, 44, 19]), Val: torch.Size([3340, 44, 19]), Test: torch.Size([4434, 44, 19])
Error in fold fold_3: Target size (torch.Size([64, 3])) must be the same as input size (torch.Size([64, 1]))




Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag3', 'AWS_lag2', 'AWS_lag6', 'AWS_rollmean_23', 'AWS_rollstd_23', 'AWS_rollmean_9', 'AWS_rollstd_9']
 Data shapes - Train: torch.Size([22712, 44, 19]), Val: torch.Size([3340, 44, 19]), Test: torch.Size([4434, 44, 19])
Error in fold fold_4: Target size (torch.Size([64, 3])) must be the same as input size (torch.Size([64, 1]))




Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag3', 'AWS_lag2', 'AWS_lag6', 'AWS_rollmean_23', 'AWS_rollstd_23', 'AWS_rollmean_9', 'AWS_rollstd_9']


[I 2025-05-06 17:16:10,930] Trial 5 finished with value: inf and parameters: {'hidden_size': 43, 'num_layers': 1, 'dropout': 0.43187979878102756, 'lr': 0.0015240184853162512, 'time_step_in': 44, 'time_step_out': 3, 'stride': 5, 'binary_weight': 0.5785822059451233, 'intensity_weight': 1.3984199641839934, 'regression_weight': 1.0005179495027035, 'rain_threshold': 0.15411656324682654, 'num_lags': 3, 'lag_0': 3, 'lag_1': 2, 'lag_2': 6, 'num_windows': 2, 'window_0': 23, 'window_1': 9}. Best is trial 4 with value: 0.22384402435272932.


 Data shapes - Train: torch.Size([29058, 44, 19]), Val: torch.Size([3340, 44, 19]), Test: torch.Size([4434, 44, 19])
Error in fold fold_5: Target size (torch.Size([64, 3])) must be the same as input size (torch.Size([64, 1]))
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag6', 'AWS_rollmean_19', 'AWS_rollstd_19', 'AWS_rollmean_12', 'AWS_rollstd_12']
 Data shapes - Train: torch.Size([3006, 46, 17]), Val: torch.Size([3006, 46, 17]), Test: torch.Size([3759, 46, 17])




   Epoch 1/5, Total Loss: 5.6503, Binary: 0.4880, Intensity: 0.3349, Regression: 3.9630
   Epoch 2/5, Total Loss: 4.8227, Binary: 0.4604, Intensity: 0.3051, Regression: 3.3539
   Epoch 3/5, Total Loss: 4.7892, Binary: 0.4739, Intensity: 0.3017, Regression: 3.3249
   Epoch 4/5, Total Loss: 4.5181, Binary: 0.4597, Intensity: 0.2555, Regression: 3.1488
   Epoch 5/5, Total Loss: 4.3727, Binary: 0.4624, Intensity: 0.2522, Regression: 3.0377
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag6', 'AWS_rollmean_19', 'AWS_rollstd_19', 'AWS_rollmean_12', 'AWS_rollstd_12']
 Data shapes - Train: torch.Size([8350, 46, 17]), Val: torch.Size([3006, 46, 17]), Test: torch.Size([3759, 46, 17])




Error in fold fold_2: Expected input batch_size (1) to match target batch_size (0).
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag6', 'AWS_rollmean_19', 'AWS_rollstd_19', 'AWS_rollmean_12', 'AWS_rollstd_12']
 Data shapes - Train: torch.Size([13694, 46, 17]), Val: torch.Size([3006, 46, 17]), Test: torch.Size([3759, 46, 17])
Error in fold fold_3: Expected input batch_size (1) to match target batch_size (0).




Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag6', 'AWS_rollmean_19', 'AWS_rollstd_19', 'AWS_rollmean_12', 'AWS_rollstd_12']
 Data shapes - Train: torch.Size([19038, 46, 17]), Val: torch.Size([3006, 46, 17]), Test: torch.Size([3759, 46, 17])
Error in fold fold_4: Expected input batch_size (1) to match target batch_size (0).




Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag6', 'AWS_rollmean_19', 'AWS_rollstd_19', 'AWS_rollmean_12', 'AWS_rollstd_12']
 Data shapes - Train: torch.Size([24382, 46, 17]), Val: torch.Size([3006, 46, 17]), Test: torch.Size([3759, 46, 17])




   Epoch 1/5, Total Loss: 1.8500, Binary: 0.4400, Intensity: 0.1562, Regression: 1.1601
   Epoch 2/5, Total Loss: 1.7053, Binary: 0.3589, Intensity: 0.1031, Regression: 1.1117
   Epoch 3/5, Total Loss: 1.4111, Binary: 0.2741, Intensity: 0.0796, Regression: 0.9322
   Epoch 4/5, Total Loss: 1.4832, Binary: 0.2694, Intensity: 0.0963, Regression: 0.9798


[I 2025-05-06 17:16:50,985] Trial 6 finished with value: 0.32431378960609436 and parameters: {'hidden_size': 228, 'num_layers': 1, 'dropout': 0.0421916630843589, 'lr': 0.0036194896421349538, 'time_step_in': 46, 'time_step_out': 1, 'stride': 6, 'binary_weight': 0.5105390992034181, 'intensity_weight': 0.7604442847625368, 'regression_weight': 1.298649560429132, 'rain_threshold': 0.1285753053197055, 'num_lags': 1, 'lag_0': 6, 'num_windows': 2, 'window_0': 19, 'window_1': 12}. Best is trial 4 with value: 0.22384402435272932.


   Epoch 5/5, Total Loss: 1.4232, Binary: 0.2568, Intensity: 0.0860, Regression: 0.9446
   Early stopping at epoch 5
Using 16 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag5', 'AWS_lag3', 'AWS_rollmean_6', 'AWS_rollstd_6']
 Data shapes - Train: torch.Size([4676, 25, 16]), Val: torch.Size([4676, 25, 16]), Test: torch.Size([5405, 25, 16])
Error in fold fold_1: Target size (torch.Size([64, 3])) must be the same as input size (torch.Size([64, 1]))
Using 16 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag5', 'AWS_lag3', 'AWS_rollmean_6', 'AWS_rollstd_6']
 Data shapes - Train: torch.Size([11022, 25, 16]), Val: torch.Size([4676, 25, 16]), Test: torch.Size([5405, 25, 16])
Error in fold fold_2: Target size (torch.Size([64, 3])) must be the same as input size (torch.Size([64, 1]))
Using 16 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', '

[I 2025-05-06 17:17:10,932] Trial 7 finished with value: inf and parameters: {'hidden_size': 73, 'num_layers': 2, 'dropout': 0.38662761235093357, 'lr': 0.0014469214522719416, 'time_step_in': 25, 'time_step_out': 3, 'stride': 5, 'binary_weight': 1.9868276562739235, 'intensity_weight': 1.6598429336649199, 'regression_weight': 0.7241957198927307, 'rain_threshold': 0.13026201484681454, 'num_lags': 2, 'lag_0': 5, 'lag_1': 3, 'num_windows': 1, 'window_0': 6}. Best is trial 4 with value: 0.22384402435272932.


 Data shapes - Train: torch.Size([30394, 25, 16]), Val: torch.Size([4676, 25, 16]), Test: torch.Size([5405, 25, 16])
Error in fold fold_5: Target size (torch.Size([64, 3])) must be the same as input size (torch.Size([64, 1]))
Using 16 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag2', 'AWS_lag9', 'AWS_rollmean_4', 'AWS_rollstd_4']
 Data shapes - Train: torch.Size([6346, 18, 16]), Val: torch.Size([6346, 18, 16]), Test: torch.Size([7048, 18, 16])
Error in fold fold_1: Target size (torch.Size([64, 3])) must be the same as input size (torch.Size([64, 1]))
Using 16 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag2', 'AWS_lag9', 'AWS_rollmean_4', 'AWS_rollstd_4']
 Data shapes - Train: torch.Size([14362, 18, 16]), Val: torch.Size([6346, 18, 16]), Test: torch.Size([7048, 18, 16])
Error in fold fold_2: Target size (torch.Size([64, 3])) must be the same as input size (to

[I 2025-05-06 17:17:30,317] Trial 8 finished with value: inf and parameters: {'hidden_size': 191, 'num_layers': 2, 'dropout': 0.41785510061455977, 'lr': 0.002559025727724323, 'time_step_in': 18, 'time_step_out': 3, 'stride': 4, 'binary_weight': 1.226715250644014, 'intensity_weight': 0.5251276401441387, 'regression_weight': 0.8317140448919218, 'rain_threshold': 0.18095112713934564, 'num_lags': 2, 'lag_0': 2, 'lag_1': 9, 'num_windows': 1, 'window_0': 4}. Best is trial 4 with value: 0.22384402435272932.


 Data shapes - Train: torch.Size([38410, 18, 16]), Val: torch.Size([6346, 18, 16]), Test: torch.Size([7048, 18, 16])
Error in fold fold_5: Target size (torch.Size([64, 3])) must be the same as input size (torch.Size([64, 1]))
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_rollmean_6', 'AWS_rollstd_6', 'AWS_rollmean_23', 'AWS_rollstd_23']
 Data shapes - Train: torch.Size([4676, 29, 17]), Val: torch.Size([4676, 29, 17]), Test: torch.Size([5198, 29, 17])
Error in fold fold_1: Expected input batch_size (1) to match target batch_size (0).
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_rollmean_6', 'AWS_rollstd_6', 'AWS_rollmean_23', 'AWS_rollstd_23']
 Data shapes - Train: torch.Size([11022, 29, 17]), Val: torch.Size([4676, 29, 17]), Test: torch.Size([5198, 29, 17])
Error in fold fold_2: Expected input batch_size (1) to match ta

[I 2025-05-06 17:18:07,225] Trial 9 finished with value: 0.15259088575839996 and parameters: {'hidden_size': 97, 'num_layers': 2, 'dropout': 0.47312792006744303, 'lr': 0.0004741917322974995, 'time_step_in': 29, 'time_step_out': 1, 'stride': 5, 'binary_weight': 1.048021924722862, 'intensity_weight': 1.4581279645729655, 'regression_weight': 1.8734558093413478, 'rain_threshold': 0.0845142367928742, 'num_lags': 1, 'lag_0': 12, 'num_windows': 2, 'window_0': 6, 'window_1': 23}. Best is trial 9 with value: 0.15259088575839996.


   Epoch 5/5, Total Loss: 1.9914, Binary: 0.2449, Intensity: 0.0737, Regression: 0.8686
Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag9', 'AWS_rollmean_11', 'AWS_rollstd_11', 'AWS_rollmean_4', 'AWS_rollstd_4', 'AWS_rollmean_23', 'AWS_rollstd_23']
 Data shapes - Train: torch.Size([8016, 25, 19]), Val: torch.Size([8016, 25, 19]), Test: torch.Size([8978, 25, 19])
Error in fold fold_1: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 1]))
Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag9', 'AWS_rollmean_11', 'AWS_rollstd_11', 'AWS_rollmean_4', 'AWS_rollstd_4', 'AWS_rollmean_23', 'AWS_rollstd_23']
 Data shapes - Train: torch.Size([18704, 25, 19]), Val: torch.Size([8016, 25, 19]), Test: torch.Size([8978, 25, 19])
Error in fold fold_2: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 

[I 2025-05-06 17:18:43,101] Trial 10 finished with value: inf and parameters: {'hidden_size': 108, 'num_layers': 2, 'dropout': 0.4746783531567493, 'lr': 0.0004440049202744458, 'time_step_in': 25, 'time_step_out': 2, 'stride': 3, 'binary_weight': 0.8590691112848667, 'intensity_weight': 1.8863212131890008, 'regression_weight': 1.9517028151772302, 'rain_threshold': 0.09050425655942868, 'num_lags': 1, 'lag_0': 9, 'num_windows': 3, 'window_0': 11, 'window_1': 4, 'window_2': 23}. Best is trial 9 with value: 0.15259088575839996.


 Data shapes - Train: torch.Size([50768, 25, 19]), Val: torch.Size([8016, 25, 19]), Test: torch.Size([8978, 25, 19])
Error in fold fold_5: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 1]))
Using 20 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_lag7', 'AWS_rollmean_11', 'AWS_rollstd_11', 'AWS_rollmean_16', 'AWS_rollstd_16', 'AWS_rollmean_3', 'AWS_rollstd_3']
 Data shapes - Train: torch.Size([5344, 33, 20]), Val: torch.Size([5344, 33, 20]), Test: torch.Size([6279, 33, 20])
   Epoch 1/5, Total Loss: 11.0011, Binary: 0.5042, Intensity: 0.6734, Regression: 4.7592
   Epoch 2/5, Total Loss: 10.9324, Binary: 0.4194, Intensity: 0.3731, Regression: 4.9751
   Epoch 3/5, Total Loss: 9.5958, Binary: 0.4172, Intensity: 0.3684, Regression: 4.3026
   Epoch 4/5, Total Loss: 9.8261, Binary: 0.4134, Intensity: 0.4070, Regression: 4.3988
   Epoch 5/5, Total Loss: 9.6963, Binary: 0.4037, Intensit

[I 2025-05-06 17:19:49,122] Trial 11 finished with value: 0.2679000496864319 and parameters: {'hidden_size': 132, 'num_layers': 2, 'dropout': 0.3322078886906962, 'lr': 0.0003587435661293148, 'time_step_in': 33, 'time_step_out': 1, 'stride': 4, 'binary_weight': 1.584429896250724, 'intensity_weight': 1.201047474743575, 'regression_weight': 1.973757239830718, 'rain_threshold': 0.09631537054437565, 'num_lags': 2, 'lag_0': 12, 'lag_1': 7, 'num_windows': 3, 'window_0': 11, 'window_1': 16, 'window_2': 3}. Best is trial 9 with value: 0.15259088575839996.


Using 20 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_lag12', 'AWS_rollmean_10', 'AWS_rollstd_10', 'AWS_rollmean_17', 'AWS_rollstd_17', 'AWS_rollmean_4', 'AWS_rollstd_4']
 Data shapes - Train: torch.Size([4676, 29, 20]), Val: torch.Size([4676, 29, 20]), Test: torch.Size([5183, 29, 20])
Error in fold fold_1: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 1]))
Using 20 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_lag12', 'AWS_rollmean_10', 'AWS_rollstd_10', 'AWS_rollmean_17', 'AWS_rollstd_17', 'AWS_rollmean_4', 'AWS_rollstd_4']
 Data shapes - Train: torch.Size([11022, 29, 20]), Val: torch.Size([4676, 29, 20]), Test: torch.Size([5183, 29, 20])
Error in fold fold_2: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 1]))
Using 20 features: ['TCW', 'TCLW', 'R250', 'R500', 'R85

[I 2025-05-06 17:20:18,087] Trial 12 finished with value: inf and parameters: {'hidden_size': 98, 'num_layers': 3, 'dropout': 0.30590047865107006, 'lr': 0.00013972932037831747, 'time_step_in': 29, 'time_step_out': 2, 'stride': 5, 'binary_weight': 1.4930632449512962, 'intensity_weight': 0.8384023346079457, 'regression_weight': 1.5110644052845528, 'rain_threshold': 0.10206630551345618, 'num_lags': 2, 'lag_0': 12, 'lag_1': 12, 'num_windows': 3, 'window_0': 10, 'window_1': 17, 'window_2': 4}. Best is trial 9 with value: 0.15259088575839996.


 Data shapes - Train: torch.Size([30060, 29, 20]), Val: torch.Size([4676, 29, 20]), Test: torch.Size([5183, 29, 20])
Error in fold fold_5: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 1]))
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag8', 'AWS_rollmean_15', 'AWS_rollstd_15', 'AWS_rollmean_17', 'AWS_rollstd_17']
 Data shapes - Train: torch.Size([4008, 36, 17]), Val: torch.Size([4008, 36, 17]), Test: torch.Size([4909, 36, 17])
Error in fold fold_1: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 1]))




Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag8', 'AWS_rollmean_15', 'AWS_rollstd_15', 'AWS_rollmean_17', 'AWS_rollstd_17']
 Data shapes - Train: torch.Size([10354, 36, 17]), Val: torch.Size([4008, 36, 17]), Test: torch.Size([4909, 36, 17])
Error in fold fold_2: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 1]))




Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag8', 'AWS_rollmean_15', 'AWS_rollstd_15', 'AWS_rollmean_17', 'AWS_rollstd_17']
 Data shapes - Train: torch.Size([17034, 36, 17]), Val: torch.Size([4008, 36, 17]), Test: torch.Size([4909, 36, 17])
Error in fold fold_3: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 1]))




Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag8', 'AWS_rollmean_15', 'AWS_rollstd_15', 'AWS_rollmean_17', 'AWS_rollstd_17']
 Data shapes - Train: torch.Size([23380, 36, 17]), Val: torch.Size([4008, 36, 17]), Test: torch.Size([4909, 36, 17])
Error in fold fold_4: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 1]))




Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag8', 'AWS_rollmean_15', 'AWS_rollstd_15', 'AWS_rollmean_17', 'AWS_rollstd_17']


[I 2025-05-06 17:20:45,208] Trial 13 finished with value: inf and parameters: {'hidden_size': 153, 'num_layers': 1, 'dropout': 0.15038033716163351, 'lr': 0.0005865004163007341, 'time_step_in': 36, 'time_step_out': 2, 'stride': 5, 'binary_weight': 1.4393320132799534, 'intensity_weight': 0.5477778167037737, 'regression_weight': 1.6675208854911143, 'rain_threshold': 0.15649216575026803, 'num_lags': 1, 'lag_0': 8, 'num_windows': 2, 'window_0': 15, 'window_1': 17}. Best is trial 9 with value: 0.15259088575839996.


 Data shapes - Train: torch.Size([29726, 36, 17]), Val: torch.Size([4008, 36, 17]), Test: torch.Size([4909, 36, 17])
Error in fold fold_5: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 1]))
Using 20 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag10', 'AWS_lag6', 'AWS_rollmean_9', 'AWS_rollstd_9', 'AWS_rollmean_24', 'AWS_rollstd_24', 'AWS_rollmean_11', 'AWS_rollstd_11']
 Data shapes - Train: torch.Size([4008, 25, 20]), Val: torch.Size([4008, 25, 20]), Test: torch.Size([4534, 25, 20])
   Epoch 1/5, Total Loss: 6.2349, Binary: 0.5304, Intensity: 0.3770, Regression: 4.1821
   Epoch 2/5, Total Loss: 6.5719, Binary: 0.5266, Intensity: 0.3709, Regression: 4.4692
   Epoch 3/5, Total Loss: 5.8589, Binary: 0.5159, Intensity: 0.3540, Regression: 3.9164
   Epoch 4/5, Total Loss: 5.7907, Binary: 0.5105, Intensity: 0.3226, Regression: 3.9068
   Epoch 5/5, Total Loss: 5.8153, Binary: 0.5101, Intensity:

[I 2025-05-06 17:21:25,129] Trial 14 finished with value: 0.2341400757431984 and parameters: {'hidden_size': 195, 'num_layers': 2, 'dropout': 0.4901604840194285, 'lr': 0.009058426184169865, 'time_step_in': 25, 'time_step_out': 1, 'stride': 6, 'binary_weight': 0.9652428292559507, 'intensity_weight': 1.6313688455208806, 'regression_weight': 1.2213656342142631, 'rain_threshold': 0.05255132123487343, 'num_lags': 2, 'lag_0': 10, 'lag_1': 6, 'num_windows': 3, 'window_0': 9, 'window_1': 24, 'window_2': 11}. Best is trial 9 with value: 0.15259088575839996.


   Epoch 5/5, Total Loss: 1.7918, Binary: 0.2692, Intensity: 0.0917, Regression: 1.1318
Using 18 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag11', 'AWS_lag1', 'AWS_rollmean_15', 'AWS_rollstd_15', 'AWS_rollmean_7', 'AWS_rollstd_7']
 Data shapes - Train: torch.Size([5344, 30, 18]), Val: torch.Size([5344, 30, 18]), Test: torch.Size([6279, 30, 18])
Error in fold fold_1: Target size (torch.Size([64, 4])) must be the same as input size (torch.Size([64, 1]))
Using 18 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag11', 'AWS_lag1', 'AWS_rollmean_15', 'AWS_rollstd_15', 'AWS_rollmean_7', 'AWS_rollstd_7']
 Data shapes - Train: torch.Size([13360, 30, 18]), Val: torch.Size([5344, 30, 18]), Test: torch.Size([6279, 30, 18])
Error in fold fold_2: Target size (torch.Size([64, 4])) must be the same as input size (torch.Size([64, 1]))
Using 18 features: ['TCW', 'TCLW', 'R250', 

[I 2025-05-06 17:21:53,956] Trial 15 finished with value: inf and parameters: {'hidden_size': 42, 'num_layers': 2, 'dropout': 0.3259350382446129, 'lr': 0.00018509287949143523, 'time_step_in': 30, 'time_step_out': 4, 'stride': 4, 'binary_weight': 1.3642332634412755, 'intensity_weight': 1.3234321620736798, 'regression_weight': 1.7828444207662848, 'rain_threshold': 0.11032962674616682, 'num_lags': 2, 'lag_0': 11, 'lag_1': 1, 'num_windows': 2, 'window_0': 15, 'window_1': 7}. Best is trial 9 with value: 0.15259088575839996.


 Data shapes - Train: torch.Size([37408, 30, 18]), Val: torch.Size([5344, 30, 18]), Test: torch.Size([6279, 30, 18])
Error in fold fold_5: Target size (torch.Size([64, 4])) must be the same as input size (torch.Size([64, 1]))
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag4', 'AWS_rollmean_8', 'AWS_rollstd_8', 'AWS_rollmean_15', 'AWS_rollstd_15']
 Data shapes - Train: torch.Size([9352, 12, 17]), Val: torch.Size([9352, 12, 17]), Test: torch.Size([10034, 12, 17])
Error in fold fold_1: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 1]))




Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag4', 'AWS_rollmean_8', 'AWS_rollstd_8', 'AWS_rollmean_15', 'AWS_rollstd_15']
 Data shapes - Train: torch.Size([20040, 12, 17]), Val: torch.Size([9352, 12, 17]), Test: torch.Size([10034, 12, 17])
Error in fold fold_2: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 1]))




Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag4', 'AWS_rollmean_8', 'AWS_rollstd_8', 'AWS_rollmean_15', 'AWS_rollstd_15']
 Data shapes - Train: torch.Size([30728, 12, 17]), Val: torch.Size([9352, 12, 17]), Test: torch.Size([10034, 12, 17])
Error in fold fold_3: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 1]))




Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag4', 'AWS_rollmean_8', 'AWS_rollstd_8', 'AWS_rollmean_15', 'AWS_rollstd_15']
 Data shapes - Train: torch.Size([41416, 12, 17]), Val: torch.Size([9352, 12, 17]), Test: torch.Size([10034, 12, 17])
Error in fold fold_4: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 1]))




Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag4', 'AWS_rollmean_8', 'AWS_rollstd_8', 'AWS_rollmean_15', 'AWS_rollstd_15']


[I 2025-05-06 17:22:15,595] Trial 16 finished with value: inf and parameters: {'hidden_size': 119, 'num_layers': 1, 'dropout': 0.16110338262548912, 'lr': 0.0007260400352294819, 'time_step_in': 12, 'time_step_out': 2, 'stride': 3, 'binary_weight': 0.8516689123532714, 'intensity_weight': 0.8544470448112697, 'regression_weight': 1.3063846277178546, 'rain_threshold': 0.07625970800414222, 'num_lags': 1, 'lag_0': 4, 'num_windows': 2, 'window_0': 8, 'window_1': 15}. Best is trial 9 with value: 0.15259088575839996.


 Data shapes - Train: torch.Size([52104, 12, 17]), Val: torch.Size([9352, 12, 17]), Test: torch.Size([10034, 12, 17])
Error in fold fold_5: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 1]))
Using 21 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag8', 'AWS_lag6', 'AWS_lag1', 'AWS_rollmean_13', 'AWS_rollstd_13', 'AWS_rollmean_19', 'AWS_rollstd_19', 'AWS_rollmean_8', 'AWS_rollstd_8']
 Data shapes - Train: torch.Size([9018, 39, 21]), Val: torch.Size([9018, 39, 21]), Test: torch.Size([11342, 39, 21])
Error in fold fold_1: Target size (torch.Size([64, 4])) must be the same as input size (torch.Size([64, 1]))
Using 21 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag8', 'AWS_lag6', 'AWS_lag1', 'AWS_rollmean_13', 'AWS_rollstd_13', 'AWS_rollmean_19', 'AWS_rollstd_19', 'AWS_rollmean_8', 'AWS_rollstd_8']
 Data shapes - Train: torch.Size(

[I 2025-05-06 17:23:16,066] Trial 17 finished with value: inf and parameters: {'hidden_size': 79, 'num_layers': 3, 'dropout': 0.3769633929191557, 'lr': 0.00024909695139645286, 'time_step_in': 39, 'time_step_out': 4, 'stride': 2, 'binary_weight': 1.7246877420507147, 'intensity_weight': 1.7390258905366098, 'regression_weight': 0.5119253551975842, 'rain_threshold': 0.14734873618829833, 'num_lags': 3, 'lag_0': 8, 'lag_1': 6, 'lag_2': 1, 'num_windows': 3, 'window_0': 13, 'window_1': 19, 'window_2': 8}. Best is trial 9 with value: 0.15259088575839996.


 Data shapes - Train: torch.Size([73146, 39, 21]), Val: torch.Size([9018, 39, 21]), Test: torch.Size([11342, 39, 21])
Error in fold fold_5: Target size (torch.Size([64, 4])) must be the same as input size (torch.Size([64, 1]))
Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag1', 'AWS_rollmean_7', 'AWS_rollstd_7']
 Data shapes - Train: torch.Size([4342, 33, 15]), Val: torch.Size([4342, 33, 15]), Test: torch.Size([5115, 33, 15])




   Epoch 1/5, Total Loss: 6.1301, Binary: 0.6337, Intensity: 0.7607, Regression: 3.1038
   Epoch 2/5, Total Loss: 5.0761, Binary: 0.5147, Intensity: 0.2604, Regression: 2.8732
   Epoch 3/5, Total Loss: 5.0135, Binary: 0.5101, Intensity: 0.2562, Regression: 2.8372
   Epoch 4/5, Total Loss: 4.9596, Binary: 0.5019, Intensity: 0.2708, Regression: 2.7950
   Epoch 5/5, Total Loss: 4.8899, Binary: 0.4893, Intensity: 0.2884, Regression: 2.7429
Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag1', 'AWS_rollmean_7', 'AWS_rollstd_7']
 Data shapes - Train: torch.Size([10688, 33, 15]), Val: torch.Size([4342, 33, 15]), Test: torch.Size([5115, 33, 15])




Error in fold fold_2: Expected input batch_size (1) to match target batch_size (0).
Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag1', 'AWS_rollmean_7', 'AWS_rollstd_7']
 Data shapes - Train: torch.Size([17034, 33, 15]), Val: torch.Size([4342, 33, 15]), Test: torch.Size([5115, 33, 15])
Error in fold fold_3: Expected input batch_size (1) to match target batch_size (0).




Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag1', 'AWS_rollmean_7', 'AWS_rollstd_7']
 Data shapes - Train: torch.Size([23714, 33, 15]), Val: torch.Size([4342, 33, 15]), Test: torch.Size([5115, 33, 15])
Error in fold fold_4: Expected input batch_size (1) to match target batch_size (0).




Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag1', 'AWS_rollmean_7', 'AWS_rollstd_7']
 Data shapes - Train: torch.Size([30060, 33, 15]), Val: torch.Size([4342, 33, 15]), Test: torch.Size([5115, 33, 15])




   Epoch 1/5, Total Loss: 2.6360, Binary: 0.5428, Intensity: 0.2081, Regression: 1.2185
   Epoch 2/5, Total Loss: 2.2389, Binary: 0.4349, Intensity: 0.1209, Regression: 1.1000
   Epoch 3/5, Total Loss: 1.9739, Binary: 0.2980, Intensity: 0.0968, Regression: 1.0443
   Epoch 4/5, Total Loss: 1.8646, Binary: 0.2678, Intensity: 0.0959, Regression: 0.9937
   Epoch 5/5, Total Loss: 1.8593, Binary: 0.2431, Intensity: 0.0868, Regression: 1.0166


[I 2025-05-06 17:23:58,822] Trial 18 finished with value: 0.2302727773785591 and parameters: {'hidden_size': 182, 'num_layers': 1, 'dropout': 0.2782316606545916, 'lr': 0.00018996223290851068, 'time_step_in': 33, 'time_step_out': 1, 'stride': 5, 'binary_weight': 1.138974389279959, 'intensity_weight': 1.1639208029124002, 'regression_weight': 1.4571793625751763, 'rain_threshold': 0.08213802328606297, 'num_lags': 1, 'lag_0': 1, 'num_windows': 1, 'window_0': 7}. Best is trial 9 with value: 0.15259088575839996.


Using 18 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag11', 'AWS_lag11', 'AWS_rollmean_19', 'AWS_rollstd_19', 'AWS_rollmean_20', 'AWS_rollstd_20']
 Data shapes - Train: torch.Size([4342, 21, 18]), Val: torch.Size([4342, 21, 18]), Test: torch.Size([4749, 21, 18])
Error in fold fold_1: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 1]))
Using 18 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag11', 'AWS_lag11', 'AWS_rollmean_19', 'AWS_rollstd_19', 'AWS_rollmean_20', 'AWS_rollstd_20']
 Data shapes - Train: torch.Size([9686, 21, 18]), Val: torch.Size([4342, 21, 18]), Test: torch.Size([4749, 21, 18])
Error in fold fold_2: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 1]))
Using 18 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag11', '

[I 2025-05-06 17:24:19,336] Trial 19 finished with value: inf and parameters: {'hidden_size': 255, 'num_layers': 2, 'dropout': 0.09832875690284004, 'lr': 0.00010113155608309197, 'time_step_in': 21, 'time_step_out': 2, 'stride': 6, 'binary_weight': 0.789417978997123, 'intensity_weight': 1.4304659679074516, 'regression_weight': 1.0922752052062483, 'rain_threshold': 0.1400976238338033, 'num_lags': 2, 'lag_0': 11, 'lag_1': 11, 'num_windows': 2, 'window_0': 19, 'window_1': 20}. Best is trial 9 with value: 0.15259088575839996.


 Data shapes - Train: torch.Size([25718, 21, 18]), Val: torch.Size([4342, 21, 18]), Test: torch.Size([4749, 21, 18])
Error in fold fold_5: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 1]))
 Best parameters for 2019-04:
  hidden_size: 97
  num_layers: 2
  dropout: 0.47312792006744303
  lr: 0.0004741917322974995
  time_step_in: 29
  time_step_out: 1
  stride: 5
  binary_weight: 1.048021924722862
  intensity_weight: 1.4581279645729655
  regression_weight: 1.8734558093413478
  rain_threshold: 0.0845142367928742
  num_lags: 1
  lag_0: 12
  num_windows: 2
  window_0: 6
  window_1: 23
 Best parameters saved to hybrid_best_params_2019-04.json

 Final evaluation on 2019-04, fold_1 test set

 Training hybrid model for 2019-04, fold_1 with best parameters
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_rollmean_6', 'AWS_rollstd_6', 'AWS_rollmean_23', 'AWS_rollstd_23']
 Data shape

[I 2025-05-06 17:24:24,495] A new study created in memory with name: hybrid_2019-10_study


 Error during optimization for 2019-04: Expected input batch_size (1) to match target batch_size (0).

###### Processing month: 2019-10
 Finding optimal hyperparameters for hybrid model on 2019-10 using cross-validation...
Using 20 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag6', 'AWS_lag11', 'AWS_rollmean_13', 'AWS_rollstd_13', 'AWS_rollmean_16', 'AWS_rollstd_16', 'AWS_rollmean_7', 'AWS_rollstd_7']
 Data shapes - Train: torch.Size([3851, 32, 20]), Val: torch.Size([4323, 32, 20]), Test: torch.Size([3679, 32, 20])
Error in fold fold_1: Target size (torch.Size([128, 4])) must be the same as input size (torch.Size([128, 1]))
Using 20 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag6', 'AWS_lag11', 'AWS_rollmean_13', 'AWS_rollstd_13', 'AWS_rollmean_16', 'AWS_rollstd_16', 'AWS_rollmean_7', 'AWS_rollstd_7']
 Data shapes - Train: torch.Size([9370, 32, 20]), Val: tor

[I 2025-05-06 17:25:00,367] Trial 0 finished with value: inf and parameters: {'hidden_size': 213, 'num_layers': 2, 'dropout': 0.15097359875627014, 'lr': 0.007247566762218518, 'time_step_in': 32, 'time_step_out': 4, 'stride': 6, 'binary_weight': 1.968912613205406, 'intensity_weight': 1.4460265933284346, 'regression_weight': 1.986035718167286, 'rain_threshold': 0.16286909845188957, 'num_lags': 2, 'lag_0': 6, 'lag_1': 11, 'num_windows': 3, 'window_0': 13, 'window_1': 16, 'window_2': 7}. Best is trial 0 with value: inf.


 Data shapes - Train: torch.Size([25930, 32, 20]), Val: torch.Size([3815, 32, 20]), Test: torch.Size([2932, 32, 20])
Error in fold fold_5: Target size (torch.Size([128, 4])) must be the same as input size (torch.Size([128, 1]))
Using 18 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag11', 'AWS_lag3', 'AWS_rollmean_22', 'AWS_rollstd_22', 'AWS_rollmean_3', 'AWS_rollstd_3']
 Data shapes - Train: torch.Size([4897, 23, 18]), Val: torch.Size([5470, 23, 18]), Test: torch.Size([4672, 23, 18])
Error in fold fold_1: Target size (torch.Size([128, 5])) must be the same as input size (torch.Size([128, 1]))
Using 18 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag11', 'AWS_lag3', 'AWS_rollmean_22', 'AWS_rollstd_22', 'AWS_rollmean_3', 'AWS_rollstd_3']
 Data shapes - Train: torch.Size([11463, 23, 18]), Val: torch.Size([5493, 23, 18]), Test: torch.Size([4527, 23, 18])
Error in f

[I 2025-05-06 17:25:23,498] Trial 1 finished with value: inf and parameters: {'hidden_size': 85, 'num_layers': 2, 'dropout': 0.4313594815570168, 'lr': 0.001128552217294873, 'time_step_in': 23, 'time_step_out': 5, 'stride': 5, 'binary_weight': 0.7578334967215641, 'intensity_weight': 1.238968385311692, 'regression_weight': 0.9066427162187991, 'rain_threshold': 0.0877909793248216, 'num_lags': 2, 'lag_0': 11, 'lag_1': 3, 'num_windows': 2, 'window_0': 22, 'window_1': 3}. Best is trial 0 with value: inf.


 Data shapes - Train: torch.Size([31495, 23, 18]), Val: torch.Size([5032, 23, 18]), Test: torch.Size([3985, 23, 18])
Error in fold fold_5: Target size (torch.Size([128, 5])) must be the same as input size (torch.Size([128, 1]))
Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag6', 'AWS_rollmean_13', 'AWS_rollstd_13']
 Data shapes - Train: torch.Size([10554, 36, 15]), Val: torch.Size([12628, 36, 15]), Test: torch.Size([10797, 36, 15])
   Epoch 1/5, Total Loss: 3.0175, Binary: 0.6931, Intensity: 0.2286, Regression: 1.2588
   Epoch 2/5, Total Loss: 2.8227, Binary: 0.6879, Intensity: 0.1265, Regression: 1.1917
   Epoch 3/5, Total Loss: 2.3290, Binary: 0.4893, Intensity: 0.1075, Regression: 1.1090
   Epoch 4/5, Total Loss: 2.0786, Binary: 0.3906, Intensity: 0.0991, Regression: 1.0625
   Epoch 5/5, Total Loss: 1.9881, Binary: 0.3738, Intensity: 0.0963, Regression: 1.0145
Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', '

[I 2025-05-06 17:27:39,858] Trial 2 finished with value: 0.5525891065597535 and parameters: {'hidden_size': 202, 'num_layers': 2, 'dropout': 0.3130540178861347, 'lr': 0.0011426006686266216, 'time_step_in': 36, 'time_step_out': 1, 'stride': 2, 'binary_weight': 1.9030089352361503, 'intensity_weight': 1.0489874770675092, 'regression_weight': 1.1588645625164453, 'rain_threshold': 0.18941102048499142, 'num_lags': 1, 'lag_0': 6, 'num_windows': 1, 'window_0': 13}. Best is trial 2 with value: 0.5525891065597535.


Using 20 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_lag2', 'AWS_rollmean_7', 'AWS_rollstd_7', 'AWS_rollmean_6', 'AWS_rollstd_6', 'AWS_rollmean_18', 'AWS_rollstd_18']
 Data shapes - Train: torch.Size([5231, 19, 20]), Val: torch.Size([5692, 19, 20]), Test: torch.Size([4844, 19, 20])
Error in fold fold_1: Target size (torch.Size([128, 4])) must be the same as input size (torch.Size([128, 1]))




Using 20 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_lag2', 'AWS_rollmean_7', 'AWS_rollstd_7', 'AWS_rollmean_6', 'AWS_rollstd_6', 'AWS_rollmean_18', 'AWS_rollstd_18']
 Data shapes - Train: torch.Size([11797, 19, 20]), Val: torch.Size([5715, 19, 20]), Test: torch.Size([4727, 19, 20])
Error in fold fold_2: Target size (torch.Size([128, 4])) must be the same as input size (torch.Size([128, 1]))




Using 20 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_lag2', 'AWS_rollmean_7', 'AWS_rollstd_7', 'AWS_rollmean_6', 'AWS_rollstd_6', 'AWS_rollmean_18', 'AWS_rollstd_18']
 Data shapes - Train: torch.Size([18561, 19, 20]), Val: torch.Size([5642, 19, 20]), Test: torch.Size([4591, 19, 20])
Error in fold fold_3: Target size (torch.Size([128, 4])) must be the same as input size (torch.Size([128, 1]))




Using 20 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_lag2', 'AWS_rollmean_7', 'AWS_rollstd_7', 'AWS_rollmean_6', 'AWS_rollstd_6', 'AWS_rollmean_18', 'AWS_rollstd_18']
 Data shapes - Train: torch.Size([25262, 19, 20]), Val: torch.Size([5468, 19, 20]), Test: torch.Size([4411, 19, 20])
Error in fold fold_4: Target size (torch.Size([128, 4])) must be the same as input size (torch.Size([128, 1]))




Using 20 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_lag2', 'AWS_rollmean_7', 'AWS_rollstd_7', 'AWS_rollmean_6', 'AWS_rollstd_6', 'AWS_rollmean_18', 'AWS_rollstd_18']


[I 2025-05-06 17:28:04,757] Trial 3 finished with value: inf and parameters: {'hidden_size': 255, 'num_layers': 1, 'dropout': 0.4014233783929709, 'lr': 0.0005078989012297537, 'time_step_in': 19, 'time_step_out': 4, 'stride': 5, 'binary_weight': 0.975133083485631, 'intensity_weight': 0.7144226901489571, 'regression_weight': 0.7400741964533138, 'rain_threshold': 0.108618620759577, 'num_lags': 2, 'lag_0': 12, 'lag_1': 2, 'num_windows': 3, 'window_0': 7, 'window_1': 6, 'window_2': 18}. Best is trial 2 with value: 0.5525891065597535.


 Data shapes - Train: torch.Size([31829, 19, 20]), Val: torch.Size([5307, 19, 20]), Test: torch.Size([4280, 19, 20])
Error in fold fold_5: Target size (torch.Size([128, 4])) must be the same as input size (torch.Size([128, 1]))
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag5', 'AWS_rollmean_20', 'AWS_rollstd_20', 'AWS_rollmean_24', 'AWS_rollstd_24']
 Data shapes - Train: torch.Size([8368, 21, 17]), Val: torch.Size([9279, 21, 17]), Test: torch.Size([7898, 21, 17])
Error in fold fold_1: Target size (torch.Size([128, 4])) must be the same as input size (torch.Size([128, 1]))
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag5', 'AWS_rollmean_20', 'AWS_rollstd_20', 'AWS_rollmean_24', 'AWS_rollstd_24']
 Data shapes - Train: torch.Size([19408, 21, 17]), Val: torch.Size([9295, 21, 17]), Test: torch.Size([7685, 21, 17])
Error in fold fold_2: Target siz

[I 2025-05-06 17:28:33,295] Trial 4 finished with value: inf and parameters: {'hidden_size': 188, 'num_layers': 3, 'dropout': 0.3194121377538766, 'lr': 0.0025341709939022683, 'time_step_in': 21, 'time_step_out': 4, 'stride': 3, 'binary_weight': 0.7324905149291152, 'intensity_weight': 0.6911906257110075, 'regression_weight': 0.8285595738200732, 'rain_threshold': 0.05974457945246435, 'num_lags': 1, 'lag_0': 5, 'num_windows': 2, 'window_0': 20, 'window_1': 24}. Best is trial 2 with value: 0.5525891065597535.


 Data shapes - Train: torch.Size([52528, 21, 17]), Val: torch.Size([8663, 21, 17]), Test: torch.Size([6840, 21, 17])
Error in fold fold_5: Target size (torch.Size([128, 4])) must be the same as input size (torch.Size([128, 1]))
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag1', 'AWS_rollmean_10', 'AWS_rollstd_10', 'AWS_rollmean_20', 'AWS_rollstd_20']
 Data shapes - Train: torch.Size([6703, 36, 17]), Val: torch.Size([8261, 36, 17]), Test: torch.Size([7071, 36, 17])
Error in fold fold_1: Target size (torch.Size([128, 4])) must be the same as input size (torch.Size([128, 1]))
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag1', 'AWS_rollmean_10', 'AWS_rollstd_10', 'AWS_rollmean_20', 'AWS_rollstd_20']
 Data shapes - Train: torch.Size([17738, 36, 17]), Val: torch.Size([8326, 36, 17]), Test: torch.Size([6754, 36, 17])
Error in fold fold_2: Target siz

[I 2025-05-06 17:29:10,880] Trial 5 finished with value: inf and parameters: {'hidden_size': 245, 'num_layers': 3, 'dropout': 0.40933852899569706, 'lr': 0.000865663139256909, 'time_step_in': 36, 'time_step_out': 4, 'stride': 3, 'binary_weight': 0.6892389855188148, 'intensity_weight': 1.2685262462176852, 'regression_weight': 0.6426078998553184, 'rain_threshold': 0.05771801718648173, 'num_lags': 1, 'lag_0': 1, 'num_windows': 2, 'window_0': 10, 'window_1': 20}. Best is trial 2 with value: 0.5525891065597535.


 Data shapes - Train: torch.Size([50858, 36, 17]), Val: torch.Size([7299, 36, 17]), Test: torch.Size([5497, 36, 17])
Error in fold fold_5: Target size (torch.Size([128, 4])) must be the same as input size (torch.Size([128, 1]))
Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag10', 'AWS_lag7', 'AWS_lag10', 'AWS_rollmean_9', 'AWS_rollstd_9', 'AWS_rollmean_23', 'AWS_rollstd_23']
 Data shapes - Train: torch.Size([3851, 32, 19]), Val: torch.Size([4334, 32, 19]), Test: torch.Size([3695, 32, 19])
Error in fold fold_1: Target size (torch.Size([128, 3])) must be the same as input size (torch.Size([128, 1]))
Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag10', 'AWS_lag7', 'AWS_lag10', 'AWS_rollmean_9', 'AWS_rollstd_9', 'AWS_rollmean_23', 'AWS_rollstd_23']
 Data shapes - Train: torch.Size([9370, 32, 19]), Val: torch.Size([4344, 32, 19]), Test: torch.Size([

[I 2025-05-06 17:29:35,802] Trial 6 finished with value: inf and parameters: {'hidden_size': 184, 'num_layers': 3, 'dropout': 0.05016009563384505, 'lr': 0.00014019913564054028, 'time_step_in': 32, 'time_step_out': 3, 'stride': 6, 'binary_weight': 1.470860924004629, 'intensity_weight': 1.1308512055009934, 'regression_weight': 1.9894541709104925, 'rain_threshold': 0.10080655033810143, 'num_lags': 3, 'lag_0': 10, 'lag_1': 7, 'lag_2': 10, 'num_windows': 2, 'window_0': 9, 'window_1': 23}. Best is trial 2 with value: 0.5525891065597535.


 Data shapes - Train: torch.Size([25930, 32, 19]), Val: torch.Size([3908, 32, 19]), Test: torch.Size([3022, 32, 19])
Error in fold fold_5: Target size (torch.Size([128, 3])) must be the same as input size (torch.Size([128, 1]))
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag9', 'AWS_rollmean_15', 'AWS_rollstd_15', 'AWS_rollmean_15', 'AWS_rollstd_15']
 Data shapes - Train: torch.Size([4278, 47, 17]), Val: torch.Size([5643, 47, 17]), Test: torch.Size([4874, 47, 17])
Error in fold fold_1: Target size (torch.Size([128, 5])) must be the same as input size (torch.Size([128, 1]))
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag9', 'AWS_rollmean_15', 'AWS_rollstd_15', 'AWS_rollmean_15', 'AWS_rollstd_15']
 Data shapes - Train: torch.Size([12552, 47, 17]), Val: torch.Size([5751, 47, 17]), Test: torch.Size([4607, 47, 17])
Error in fold fold_2: Target siz

[I 2025-05-06 17:30:10,987] Trial 7 finished with value: inf and parameters: {'hidden_size': 48, 'num_layers': 3, 'dropout': 0.3377066583372693, 'lr': 0.002072587008247253, 'time_step_in': 47, 'time_step_out': 5, 'stride': 4, 'binary_weight': 0.6880412662494839, 'intensity_weight': 0.9269709271124873, 'regression_weight': 0.5485292939824133, 'rain_threshold': 0.17875426847791265, 'num_lags': 1, 'lag_0': 9, 'num_windows': 2, 'window_0': 15, 'window_1': 15}. Best is trial 2 with value: 0.5525891065597535.


 Data shapes - Train: torch.Size([37392, 47, 17]), Val: torch.Size([4717, 47, 17]), Test: torch.Size([3451, 47, 17])
Error in fold fold_5: Target size (torch.Size([128, 5])) must be the same as input size (torch.Size([128, 1]))
Using 20 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag7', 'AWS_lag2', 'AWS_rollmean_20', 'AWS_rollstd_20', 'AWS_rollmean_7', 'AWS_rollstd_7', 'AWS_rollmean_18', 'AWS_rollstd_18']
 Data shapes - Train: torch.Size([6370, 40, 20]), Val: torch.Size([8074, 40, 20]), Test: torch.Size([6914, 40, 20])
Error in fold fold_1: Target size (torch.Size([128, 3])) must be the same as input size (torch.Size([128, 1]))
Using 20 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag7', 'AWS_lag2', 'AWS_rollmean_20', 'AWS_rollstd_20', 'AWS_rollmean_7', 'AWS_rollstd_7', 'AWS_rollmean_18', 'AWS_rollstd_18']
 Data shapes - Train: torch.Size([17404, 40, 20]), Val:

[I 2025-05-06 17:30:57,129] Trial 8 finished with value: inf and parameters: {'hidden_size': 41, 'num_layers': 3, 'dropout': 0.33179610594780884, 'lr': 0.00013367273332742576, 'time_step_in': 40, 'time_step_out': 3, 'stride': 3, 'binary_weight': 1.1283356002948186, 'intensity_weight': 1.2070107148792397, 'regression_weight': 0.5471380787070674, 'rain_threshold': 0.14319389884866113, 'num_lags': 2, 'lag_0': 7, 'lag_1': 2, 'num_windows': 3, 'window_0': 20, 'window_1': 7, 'window_2': 18}. Best is trial 2 with value: 0.5525891065597535.


 Data shapes - Train: torch.Size([50524, 40, 20]), Val: torch.Size([7035, 40, 20]), Test: torch.Size([5236, 40, 20])
Error in fold fold_5: Target size (torch.Size([128, 3])) must be the same as input size (torch.Size([128, 1]))
Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag7', 'AWS_rollmean_12', 'AWS_rollstd_12']
 Data shapes - Train: torch.Size([4944, 40, 15]), Val: torch.Size([6105, 40, 15]), Test: torch.Size([5206, 40, 15])
Error in fold fold_1: Target size (torch.Size([128, 3])) must be the same as input size (torch.Size([128, 1]))




Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag7', 'AWS_rollmean_12', 'AWS_rollstd_12']
 Data shapes - Train: torch.Size([13220, 40, 15]), Val: torch.Size([6133, 40, 15]), Test: torch.Size([4936, 40, 15])
Error in fold fold_2: Target size (torch.Size([128, 3])) must be the same as input size (torch.Size([128, 1]))




Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag7', 'AWS_rollmean_12', 'AWS_rollstd_12']
 Data shapes - Train: torch.Size([21500, 40, 15]), Val: torch.Size([5990, 40, 15]), Test: torch.Size([4634, 40, 15])
Error in fold fold_3: Target size (torch.Size([128, 3])) must be the same as input size (torch.Size([128, 1]))




Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag7', 'AWS_rollmean_12', 'AWS_rollstd_12']
 Data shapes - Train: torch.Size([29780, 40, 15]), Val: torch.Size([5686, 40, 15]), Test: torch.Size([4337, 40, 15])
Error in fold fold_4: Target size (torch.Size([128, 3])) must be the same as input size (torch.Size([128, 1]))




Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag7', 'AWS_rollmean_12', 'AWS_rollstd_12']


[I 2025-05-06 17:31:25,533] Trial 9 finished with value: inf and parameters: {'hidden_size': 78, 'num_layers': 1, 'dropout': 0.13124603058822354, 'lr': 0.0018884844078603507, 'time_step_in': 40, 'time_step_out': 3, 'stride': 4, 'binary_weight': 1.42941333337774, 'intensity_weight': 1.774956543146019, 'regression_weight': 1.8800870324197918, 'rain_threshold': 0.14512848317053942, 'num_lags': 1, 'lag_0': 7, 'num_windows': 1, 'window_0': 12}. Best is trial 2 with value: 0.5525891065597535.


 Data shapes - Train: torch.Size([38060, 40, 15]), Val: torch.Size([5265, 40, 15]), Test: torch.Size([3967, 40, 15])
Error in fold fold_5: Target size (torch.Size([128, 3])) must be the same as input size (torch.Size([128, 1]))
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag3', 'AWS_lag10', 'AWS_lag2', 'AWS_rollmean_3', 'AWS_rollstd_3']
 Data shapes - Train: torch.Size([28444, 14, 17]), Val: torch.Size([29835, 14, 17]), Test: torch.Size([25258, 14, 17])
   Epoch 1/5, Total Loss: 3.0638, Binary: 0.6945, Intensity: 0.1303, Regression: 1.0196
   Epoch 2/5, Total Loss: 2.5720, Binary: 0.5198, Intensity: 0.1022, Regression: 0.9544
   Epoch 3/5, Total Loss: 2.1961, Binary: 0.3426, Intensity: 0.0961, Regression: 0.9457
   Epoch 4/5, Total Loss: 2.1545, Binary: 0.3276, Intensity: 0.0943, Regression: 0.9397
   Epoch 5/5, Total Loss: 2.0967, Binary: 0.3183, Intensity: 0.0909, Regression: 0.9165
Using 17 features: ['TCW', 'T

[I 2025-05-06 17:34:53,189] Trial 10 finished with value: 0.5049721539020539 and parameters: {'hidden_size': 138, 'num_layers': 2, 'dropout': 0.2068806560885842, 'lr': 0.007581170297223678, 'time_step_in': 14, 'time_step_out': 1, 'stride': 1, 'binary_weight': 1.9866906687608146, 'intensity_weight': 1.8846691177008739, 'regression_weight': 1.4108262076050648, 'rain_threshold': 0.1972097089816166, 'num_lags': 3, 'lag_0': 3, 'lag_1': 10, 'lag_2': 2, 'num_windows': 1, 'window_0': 3}. Best is trial 10 with value: 0.5049721539020539.


Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag3', 'AWS_lag12', 'AWS_lag1', 'AWS_rollmean_3', 'AWS_rollstd_3']
 Data shapes - Train: torch.Size([29112, 12, 17]), Val: torch.Size([30279, 12, 17]), Test: torch.Size([25616, 12, 17])
   Epoch 1/5, Total Loss: 2.8760, Binary: 0.6049, Intensity: 0.1293, Regression: 1.0762
   Epoch 2/5, Total Loss: 2.0835, Binary: 0.3395, Intensity: 0.0919, Regression: 0.9278
   Epoch 3/5, Total Loss: 2.0241, Binary: 0.3217, Intensity: 0.0883, Regression: 0.9147
   Epoch 4/5, Total Loss: 2.0182, Binary: 0.3180, Intensity: 0.0893, Regression: 0.9145
   Epoch 5/5, Total Loss: 2.0042, Binary: 0.3139, Intensity: 0.0892, Regression: 0.9103
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag3', 'AWS_lag12', 'AWS_lag1', 'AWS_rollmean_3', 'AWS_rollstd_3']
 Data shapes - Train: torch.Size([62232, 12, 17]), Val: torch.Size([30287

[I 2025-05-06 17:38:08,559] Trial 11 finished with value: 0.487163245677948 and parameters: {'hidden_size': 137, 'num_layers': 2, 'dropout': 0.21899601855488443, 'lr': 0.0053016964010182445, 'time_step_in': 12, 'time_step_out': 1, 'stride': 1, 'binary_weight': 1.9784717420650746, 'intensity_weight': 1.8442010975316903, 'regression_weight': 1.3388887814642736, 'rain_threshold': 0.1982964237317818, 'num_lags': 3, 'lag_0': 3, 'lag_1': 12, 'lag_2': 1, 'num_windows': 1, 'window_0': 3}. Best is trial 11 with value: 0.487163245677948.


Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag2', 'AWS_lag12', 'AWS_lag1', 'AWS_rollmean_3', 'AWS_rollstd_3']
 Data shapes - Train: torch.Size([29112, 12, 17]), Val: torch.Size([30279, 12, 17]), Test: torch.Size([25616, 12, 17])
   Epoch 1/5, Total Loss: 3.1045, Binary: 0.6937, Intensity: 0.1337, Regression: 1.0720
   Epoch 2/5, Total Loss: 2.8822, Binary: 0.6715, Intensity: 0.1074, Regression: 0.9868
   Epoch 3/5, Total Loss: 2.2925, Binary: 0.3642, Intensity: 0.0970, Regression: 0.9568
   Epoch 4/5, Total Loss: 2.1824, Binary: 0.3373, Intensity: 0.0924, Regression: 0.9211
   Epoch 5/5, Total Loss: 2.1448, Binary: 0.3301, Intensity: 0.0903, Regression: 0.9075
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag2', 'AWS_lag12', 'AWS_lag1', 'AWS_rollmean_3', 'AWS_rollstd_3']
 Data shapes - Train: torch.Size([62232, 12, 17]), Val: torch.Size([30287

[I 2025-05-06 17:41:20,939] Trial 12 finished with value: 0.5046171605587005 and parameters: {'hidden_size': 136, 'num_layers': 2, 'dropout': 0.20665775146545096, 'lr': 0.00973174645509762, 'time_step_in': 12, 'time_step_out': 1, 'stride': 1, 'binary_weight': 1.7010306166910494, 'intensity_weight': 1.998248197355564, 'regression_weight': 1.5459058625377553, 'rain_threshold': 0.19384476085849864, 'num_lags': 3, 'lag_0': 2, 'lag_1': 12, 'lag_2': 1, 'num_windows': 1, 'window_0': 3}. Best is trial 11 with value: 0.487163245677948.


Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag1', 'AWS_lag12', 'AWS_lag1', 'AWS_rollmean_4', 'AWS_rollstd_4']
 Data shapes - Train: torch.Size([28778, 12, 17]), Val: torch.Size([30057, 12, 17]), Test: torch.Size([25437, 12, 17])
Error in fold fold_1: Target size (torch.Size([128, 2])) must be the same as input size (torch.Size([128, 1]))
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag1', 'AWS_lag12', 'AWS_lag1', 'AWS_rollmean_4', 'AWS_rollstd_4']
 Data shapes - Train: torch.Size([61898, 12, 17]), Val: torch.Size([30065, 12, 17]), Test: torch.Size([25087, 12, 17])
Error in fold fold_2: Target size (torch.Size([128, 2])) must be the same as input size (torch.Size([128, 1]))
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag1', 'AWS_lag12', 'AWS_lag1', 'AWS_rollmean_4', 'AWS_

[I 2025-05-06 17:42:01,220] Trial 13 finished with value: inf and parameters: {'hidden_size': 135, 'num_layers': 2, 'dropout': 0.23794813361343042, 'lr': 0.004695658490247776, 'time_step_in': 12, 'time_step_out': 2, 'stride': 1, 'binary_weight': 1.655951958109287, 'intensity_weight': 1.7102901600199552, 'regression_weight': 1.5886460402208247, 'rain_threshold': 0.16805181798334826, 'num_lags': 3, 'lag_0': 1, 'lag_1': 12, 'lag_2': 1, 'num_windows': 1, 'window_0': 4}. Best is trial 11 with value: 0.487163245677948.


 Data shapes - Train: torch.Size([161258, 12, 17]), Val: torch.Size([28803, 12, 17]), Test: torch.Size([23556, 12, 17])
Error in fold fold_5: Target size (torch.Size([128, 2])) must be the same as input size (torch.Size([128, 1]))
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag3', 'AWS_lag8', 'AWS_lag4', 'AWS_rollmean_5', 'AWS_rollstd_5']
 Data shapes - Train: torch.Size([12219, 26, 17]), Val: torch.Size([13525, 26, 17]), Test: torch.Size([11549, 26, 17])
Error in fold fold_1: Target size (torch.Size([128, 2])) must be the same as input size (torch.Size([128, 1]))




Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag3', 'AWS_lag8', 'AWS_lag4', 'AWS_rollmean_5', 'AWS_rollstd_5']
 Data shapes - Train: torch.Size([28778, 26, 17]), Val: torch.Size([13562, 26, 17]), Test: torch.Size([11193, 26, 17])
Error in fold fold_2: Target size (torch.Size([128, 2])) must be the same as input size (torch.Size([128, 1]))




Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag3', 'AWS_lag8', 'AWS_lag4', 'AWS_rollmean_5', 'AWS_rollstd_5']
 Data shapes - Train: torch.Size([45338, 26, 17]), Val: torch.Size([13427, 26, 17]), Test: torch.Size([10717, 26, 17])
Error in fold fold_3: Target size (torch.Size([128, 2])) must be the same as input size (torch.Size([128, 1]))




Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag3', 'AWS_lag8', 'AWS_lag4', 'AWS_rollmean_5', 'AWS_rollstd_5']
 Data shapes - Train: torch.Size([61898, 26, 17]), Val: torch.Size([12937, 26, 17]), Test: torch.Size([10250, 26, 17])
Error in fold fold_4: Target size (torch.Size([128, 2])) must be the same as input size (torch.Size([128, 1]))




Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag3', 'AWS_lag8', 'AWS_lag4', 'AWS_rollmean_5', 'AWS_rollstd_5']


[I 2025-05-06 17:42:40,624] Trial 14 finished with value: inf and parameters: {'hidden_size': 113, 'num_layers': 1, 'dropout': 0.004183819858477489, 'lr': 0.004029088183439236, 'time_step_in': 26, 'time_step_out': 2, 'stride': 2, 'binary_weight': 1.7189621050138268, 'intensity_weight': 1.5564809366037355, 'regression_weight': 1.6356396503766921, 'rain_threshold': 0.19667312166229656, 'num_lags': 3, 'lag_0': 3, 'lag_1': 8, 'lag_2': 4, 'num_windows': 1, 'window_0': 5}. Best is trial 11 with value: 0.487163245677948.


 Data shapes - Train: torch.Size([78458, 26, 17]), Val: torch.Size([12463, 26, 17]), Test: torch.Size([9765, 26, 17])
Error in fold fold_5: Target size (torch.Size([128, 2])) must be the same as input size (torch.Size([128, 1]))
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag3', 'AWS_lag9', 'AWS_lag6', 'AWS_rollmean_7', 'AWS_rollstd_7']
 Data shapes - Train: torch.Size([27776, 16, 17]), Val: torch.Size([29391, 16, 17]), Test: torch.Size([24904, 16, 17])
   Epoch 1/5, Total Loss: 2.8747, Binary: 0.6962, Intensity: 0.1334, Regression: 1.1306
   Epoch 2/5, Total Loss: 2.6536, Binary: 0.6930, Intensity: 0.1082, Regression: 0.9986
   Epoch 3/5, Total Loss: 2.5885, Binary: 0.6852, Intensity: 0.1067, Regression: 0.9597
   Epoch 4/5, Total Loss: 2.1664, Binary: 0.4384, Intensity: 0.1039, Regression: 0.9645
   Epoch 5/5, Total Loss: 2.0106, Binary: 0.3729, Intensity: 0.0946, Regression: 0.9445
Using 17 features: ['TCW', 'T

[I 2025-05-06 17:46:26,055] Trial 15 finished with value: 0.5174278676509857 and parameters: {'hidden_size': 156, 'num_layers': 2, 'dropout': 0.155823042868557, 'lr': 0.008115124167019283, 'time_step_in': 16, 'time_step_out': 1, 'stride': 1, 'binary_weight': 1.7124668002656045, 'intensity_weight': 1.9939561456027675, 'regression_weight': 1.2528426590562791, 'rain_threshold': 0.14962190014135401, 'num_lags': 3, 'lag_0': 3, 'lag_1': 9, 'lag_2': 6, 'num_windows': 1, 'window_0': 7}. Best is trial 11 with value: 0.487163245677948.


Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag4', 'AWS_lag12', 'AWS_lag1', 'AWS_rollmean_3', 'AWS_rollstd_3']
 Data shapes - Train: torch.Size([13554, 18, 17]), Val: torch.Size([14413, 18, 17]), Test: torch.Size([12240, 18, 17])
Error in fold fold_1: Target size (torch.Size([128, 2])) must be the same as input size (torch.Size([128, 1]))
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag4', 'AWS_lag12', 'AWS_lag1', 'AWS_rollmean_3', 'AWS_rollstd_3']
 Data shapes - Train: torch.Size([30114, 18, 17]), Val: torch.Size([14438, 18, 17]), Test: torch.Size([11997, 18, 17])
Error in fold fold_2: Target size (torch.Size([128, 2])) must be the same as input size (torch.Size([128, 1]))
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag4', 'AWS_lag12', 'AWS_lag1', 'AWS_rollmean_3', 'AWS_

[I 2025-05-06 17:46:57,245] Trial 16 finished with value: inf and parameters: {'hidden_size': 106, 'num_layers': 2, 'dropout': 0.0889842834079008, 'lr': 0.0034550742488501786, 'time_step_in': 18, 'time_step_out': 2, 'stride': 2, 'binary_weight': 1.4610031579569769, 'intensity_weight': 1.5918565370938311, 'regression_weight': 1.0817084125323375, 'rain_threshold': 0.1288473035078038, 'num_lags': 3, 'lag_0': 4, 'lag_1': 12, 'lag_2': 1, 'num_windows': 1, 'window_0': 3}. Best is trial 11 with value: 0.487163245677948.


 Data shapes - Train: torch.Size([79794, 18, 17]), Val: torch.Size([13585, 18, 17]), Test: torch.Size([10915, 18, 17])
Error in fold fold_5: Target size (torch.Size([128, 2])) must be the same as input size (torch.Size([128, 1]))
Using 16 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag2', 'AWS_lag5', 'AWS_rollmean_16', 'AWS_rollstd_16']
 Data shapes - Train: torch.Size([22440, 27, 16]), Val: torch.Size([25918, 27, 16]), Test: torch.Size([22178, 27, 16])
Error in fold fold_1: Target size (torch.Size([128, 6])) must be the same as input size (torch.Size([128, 1]))




Using 16 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag2', 'AWS_lag5', 'AWS_rollmean_16', 'AWS_rollstd_16']
 Data shapes - Train: torch.Size([55552, 27, 16]), Val: torch.Size([26051, 27, 16]), Test: torch.Size([21315, 27, 16])
Error in fold fold_2: Target size (torch.Size([128, 6])) must be the same as input size (torch.Size([128, 1]))




Using 16 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag2', 'AWS_lag5', 'AWS_rollmean_16', 'AWS_rollstd_16']
 Data shapes - Train: torch.Size([88672, 27, 16]), Val: torch.Size([25714, 27, 16]), Test: torch.Size([20263, 27, 16])
Error in fold fold_3: Target size (torch.Size([128, 6])) must be the same as input size (torch.Size([128, 1]))




Using 16 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag2', 'AWS_lag5', 'AWS_rollmean_16', 'AWS_rollstd_16']
 Data shapes - Train: torch.Size([121792, 27, 16]), Val: torch.Size([24640, 27, 16]), Test: torch.Size([19255, 27, 16])
Error in fold fold_4: Target size (torch.Size([128, 6])) must be the same as input size (torch.Size([128, 1]))




Using 16 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag2', 'AWS_lag5', 'AWS_rollmean_16', 'AWS_rollstd_16']


[I 2025-05-06 17:48:04,712] Trial 17 finished with value: inf and parameters: {'hidden_size': 161, 'num_layers': 1, 'dropout': 0.22661409355552875, 'lr': 0.009180354240095523, 'time_step_in': 27, 'time_step_out': 6, 'stride': 1, 'binary_weight': 1.8252006134182985, 'intensity_weight': 1.9911803659200342, 'regression_weight': 1.5014498854391087, 'rain_threshold': 0.17531344764121315, 'num_lags': 2, 'lag_0': 2, 'lag_1': 5, 'num_windows': 1, 'window_0': 16}. Best is trial 11 with value: 0.487163245677948.


 Data shapes - Train: torch.Size([154912, 27, 16]), Val: torch.Size([23398, 27, 16]), Test: torch.Size([18064, 27, 16])
Error in fold fold_5: Target size (torch.Size([128, 6])) must be the same as input size (torch.Size([128, 1]))
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag4', 'AWS_lag10', 'AWS_lag4', 'AWS_rollmean_6', 'AWS_rollstd_6']
 Data shapes - Train: torch.Size([14556, 12, 17]), Val: torch.Size([15200, 12, 17]), Test: torch.Size([12847, 12, 17])




   Epoch 1/5, Total Loss: 3.7291, Binary: 0.6944, Intensity: 0.4145, Regression: 1.2415
   Epoch 2/5, Total Loss: 2.9634, Binary: 0.6919, Intensity: 0.1262, Regression: 1.0885
   Epoch 3/5, Total Loss: 2.8518, Binary: 0.6891, Intensity: 0.1147, Regression: 1.0373
   Epoch 4/5, Total Loss: 2.7891, Binary: 0.6629, Intensity: 0.1083, Regression: 1.0265
   Epoch 5/5, Total Loss: 2.4559, Binary: 0.4778, Intensity: 0.0975, Regression: 0.9794
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag4', 'AWS_lag10', 'AWS_lag4', 'AWS_rollmean_6', 'AWS_rollstd_6']
 Data shapes - Train: torch.Size([31116, 12, 17]), Val: torch.Size([15183, 12, 17]), Test: torch.Size([12680, 12, 17])




   Epoch 1/5, Total Loss: 3.4515, Binary: 0.6843, Intensity: 0.2610, Regression: 1.2422
   Epoch 2/5, Total Loss: 2.6759, Binary: 0.4809, Intensity: 0.1176, Regression: 1.0850
   Epoch 3/5, Total Loss: 2.4721, Binary: 0.4102, Intensity: 0.1085, Regression: 1.0275
   Epoch 4/5, Total Loss: 2.4010, Binary: 0.3943, Intensity: 0.1050, Regression: 1.0013
   Epoch 5/5, Total Loss: 2.3724, Binary: 0.3880, Intensity: 0.1028, Regression: 0.9915
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag4', 'AWS_lag10', 'AWS_lag4', 'AWS_rollmean_6', 'AWS_rollstd_6']
 Data shapes - Train: torch.Size([47676, 12, 17]), Val: torch.Size([15018, 12, 17]), Test: torch.Size([12430, 12, 17])




   Epoch 1/5, Total Loss: 3.0551, Binary: 0.6177, Intensity: 0.2023, Regression: 1.1196
   Epoch 2/5, Total Loss: 2.4590, Binary: 0.4245, Intensity: 0.1085, Regression: 1.0094
   Epoch 3/5, Total Loss: 2.3369, Binary: 0.4022, Intensity: 0.1043, Regression: 0.9589
   Epoch 4/5, Total Loss: 2.2829, Binary: 0.3902, Intensity: 0.1010, Regression: 0.9397
   Epoch 5/5, Total Loss: 2.2449, Binary: 0.3795, Intensity: 0.1000, Regression: 0.9264
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag4', 'AWS_lag10', 'AWS_lag4', 'AWS_rollmean_6', 'AWS_rollstd_6']
 Data shapes - Train: torch.Size([64236, 12, 17]), Val: torch.Size([14872, 12, 17]), Test: torch.Size([12264, 12, 17])




   Epoch 1/5, Total Loss: 3.1372, Binary: 0.6374, Intensity: 0.2038, Regression: 1.1514
   Epoch 2/5, Total Loss: 2.5628, Binary: 0.4533, Intensity: 0.1123, Regression: 1.0448
   Epoch 3/5, Total Loss: 2.4366, Binary: 0.4259, Intensity: 0.1070, Regression: 0.9968
   Epoch 4/5, Total Loss: 2.3917, Binary: 0.4134, Intensity: 0.1052, Regression: 0.9817
   Epoch 5/5, Total Loss: 2.3676, Binary: 0.4048, Intensity: 0.1037, Regression: 0.9756
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag4', 'AWS_lag10', 'AWS_lag4', 'AWS_rollmean_6', 'AWS_rollstd_6']
 Data shapes - Train: torch.Size([80796, 12, 17]), Val: torch.Size([14623, 12, 17]), Test: torch.Size([12031, 12, 17])




   Epoch 1/5, Total Loss: 3.5924, Binary: 0.5687, Intensity: 0.2235, Regression: 1.4468
   Epoch 2/5, Total Loss: 3.0674, Binary: 0.4491, Intensity: 0.1444, Regression: 1.3091
   Epoch 3/5, Total Loss: 2.9476, Binary: 0.4242, Intensity: 0.1394, Regression: 1.2627
   Epoch 4/5, Total Loss: 2.8794, Binary: 0.4083, Intensity: 0.1369, Regression: 1.2373
   Epoch 5/5, Total Loss: 2.8566, Binary: 0.3961, Intensity: 0.1354, Regression: 1.2345


[I 2025-05-06 17:49:12,729] Trial 18 finished with value: 0.5551138103008271 and parameters: {'hidden_size': 123, 'num_layers': 1, 'dropout': 0.47757323981011207, 'lr': 0.00035529521440937626, 'time_step_in': 12, 'time_step_out': 1, 'stride': 2, 'binary_weight': 1.2627909479862138, 'intensity_weight': 1.7326954971493573, 'regression_weight': 1.7188498000251977, 'rain_threshold': 0.18165187276486935, 'num_lags': 3, 'lag_0': 4, 'lag_1': 10, 'lag_2': 4, 'num_windows': 1, 'window_0': 6}. Best is trial 11 with value: 0.487163245677948.


Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag1', 'AWS_lag5', 'AWS_lag11', 'AWS_rollmean_9', 'AWS_rollstd_9', 'AWS_rollmean_11', 'AWS_rollstd_11']
 Data shapes - Train: torch.Size([24771, 24, 19]), Val: torch.Size([27393, 24, 19]), Test: torch.Size([23344, 24, 19])
Error in fold fold_1: Target size (torch.Size([128, 2])) must be the same as input size (torch.Size([128, 1]))
Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag1', 'AWS_lag5', 'AWS_lag11', 'AWS_rollmean_9', 'AWS_rollstd_9', 'AWS_rollmean_11', 'AWS_rollstd_11']
 Data shapes - Train: torch.Size([57890, 24, 19]), Val: torch.Size([27413, 24, 19]), Test: torch.Size([22651, 24, 19])
Error in fold fold_2: Target size (torch.Size([128, 2])) must be the same as input size (torch.Size([128, 1]))
Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', '

[I 2025-05-06 17:50:25,087] Trial 19 finished with value: inf and parameters: {'hidden_size': 88, 'num_layers': 2, 'dropout': 0.2781154359374327, 'lr': 0.0051867347615023466, 'time_step_in': 24, 'time_step_out': 2, 'stride': 1, 'binary_weight': 1.5845857668101548, 'intensity_weight': 1.4070743210873489, 'regression_weight': 1.3972515564664847, 'rain_threshold': 0.1629277083506936, 'num_lags': 3, 'lag_0': 1, 'lag_1': 5, 'lag_2': 11, 'num_windows': 2, 'window_0': 9, 'window_1': 11}. Best is trial 11 with value: 0.487163245677948.


 Data shapes - Train: torch.Size([157250, 24, 19]), Val: torch.Size([25323, 24, 19]), Test: torch.Size([19953, 24, 19])
Error in fold fold_5: Target size (torch.Size([128, 2])) must be the same as input size (torch.Size([128, 1]))
 Best parameters for 2019-10:
  hidden_size: 137
  num_layers: 2
  dropout: 0.21899601855488443
  lr: 0.0053016964010182445
  time_step_in: 12
  time_step_out: 1
  stride: 1
  binary_weight: 1.9784717420650746
  intensity_weight: 1.8442010975316903
  regression_weight: 1.3388887814642736
  rain_threshold: 0.1982964237317818
  num_lags: 3
  lag_0: 3
  lag_1: 12
  lag_2: 1
  num_windows: 1
  window_0: 3
 Best parameters saved to hybrid_best_params_2019-10.json

 Final evaluation on 2019-10, fold_1 test set

 Training hybrid model for 2019-10, fold_1 with best parameters
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag3', 'AWS_lag12', 'AWS_lag1', 'AWS_rollmean_3', 'AWS_rollstd_3']
 Data shap

[I 2025-05-06 17:58:38,103] A new study created in memory with name: hybrid_2020-04_study


  Test RMSE: 0.6913, Bias: -0.0408, R²: 0.6370, CSI: 0.7977
  Binary Accuracy: 0.9061, Intensity Accuracy: 0.9527
 Model saved to hybrid_model_2019-10_fold_5.pt

 Mean test performance for 2019-10:
  RMSE: 0.5329, Bias: -0.0214, R²: 0.7093, CSI: 0.7727
 Error during optimization for 2019-10: Invalid format specifier

###### Processing month: 2020-04
 Finding optimal hyperparameters for hybrid model on 2020-04 using cross-validation...
Using 18 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag3', 'AWS_lag2', 'AWS_rollmean_15', 'AWS_rollstd_15', 'AWS_rollmean_7', 'AWS_rollstd_7']
 Data shapes - Train: torch.Size([23380, 21, 18]), Val: torch.Size([23380, 21, 18]), Test: torch.Size([21094, 21, 18])
Error in fold fold_1: Target size (torch.Size([64, 6])) must be the same as input size (torch.Size([64, 1]))
Using 18 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag3', '

[I 2025-05-06 17:59:45,442] Trial 0 finished with value: inf and parameters: {'hidden_size': 142, 'num_layers': 3, 'dropout': 0.09474152576418698, 'lr': 0.0021629818762299953, 'time_step_in': 21, 'time_step_out': 6, 'stride': 1, 'binary_weight': 0.7214195554085417, 'intensity_weight': 1.9309091546091146, 'regression_weight': 1.1110015056372005, 'rain_threshold': 0.11603292516492292, 'num_lags': 2, 'lag_0': 3, 'lag_1': 2, 'num_windows': 2, 'window_0': 15, 'window_1': 7}. Best is trial 0 with value: inf.


 Data shapes - Train: torch.Size([151636, 21, 18]), Val: torch.Size([23380, 21, 18]), Test: torch.Size([21094, 21, 18])
Error in fold fold_5: Target size (torch.Size([64, 6])) must be the same as input size (torch.Size([64, 1]))
Using 21 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_lag8', 'AWS_lag6', 'AWS_rollmean_24', 'AWS_rollstd_24', 'AWS_rollmean_10', 'AWS_rollstd_10', 'AWS_rollmean_17', 'AWS_rollstd_17']
 Data shapes - Train: torch.Size([4008, 48, 21]), Val: torch.Size([4008, 48, 21]), Test: torch.Size([3741, 48, 21])
Error in fold fold_1: Target size (torch.Size([64, 4])) must be the same as input size (torch.Size([64, 1]))
Using 21 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_lag8', 'AWS_lag6', 'AWS_rollmean_24', 'AWS_rollstd_24', 'AWS_rollmean_10', 'AWS_rollstd_10', 'AWS_rollmean_17', 'AWS_rollstd_17']
 Data shapes - Train: torc

[I 2025-05-06 18:00:25,191] Trial 1 finished with value: inf and parameters: {'hidden_size': 250, 'num_layers': 3, 'dropout': 0.18729412281038343, 'lr': 0.002758193290202983, 'time_step_in': 48, 'time_step_out': 4, 'stride': 4, 'binary_weight': 1.3442742581209977, 'intensity_weight': 1.2526965505639434, 'regression_weight': 0.9014629939242251, 'rain_threshold': 0.17065691399751182, 'num_lags': 3, 'lag_0': 12, 'lag_1': 8, 'lag_2': 6, 'num_windows': 3, 'window_0': 24, 'window_1': 10, 'window_2': 17}. Best is trial 0 with value: inf.


 Data shapes - Train: torch.Size([36072, 48, 21]), Val: torch.Size([4008, 48, 21]), Test: torch.Size([3741, 48, 21])
Error in fold fold_5: Target size (torch.Size([64, 4])) must be the same as input size (torch.Size([64, 1]))
Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag2', 'AWS_rollmean_20', 'AWS_rollstd_20']
 Data shapes - Train: torch.Size([6012, 40, 15]), Val: torch.Size([6012, 40, 15]), Test: torch.Size([5766, 40, 15])
Error in fold fold_1: Target size (torch.Size([64, 3])) must be the same as input size (torch.Size([64, 1]))
Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag2', 'AWS_rollmean_20', 'AWS_rollstd_20']
 Data shapes - Train: torch.Size([16700, 40, 15]), Val: torch.Size([6012, 40, 15]), Test: torch.Size([5766, 40, 15])
Error in fold fold_2: Target size (torch.Size([64, 3])) must be the same as input size (torch.Size([64, 1]))
U

[I 2025-05-06 18:00:57,726] Trial 2 finished with value: inf and parameters: {'hidden_size': 90, 'num_layers': 3, 'dropout': 0.4070819784010629, 'lr': 0.007971724005704964, 'time_step_in': 40, 'time_step_out': 3, 'stride': 3, 'binary_weight': 1.524101000973977, 'intensity_weight': 0.5173673488265871, 'regression_weight': 1.114704295009609, 'rain_threshold': 0.05044592144982057, 'num_lags': 1, 'lag_0': 2, 'num_windows': 1, 'window_0': 20}. Best is trial 0 with value: inf.


 Data shapes - Train: torch.Size([48764, 40, 15]), Val: torch.Size([6012, 40, 15]), Test: torch.Size([5766, 40, 15])
Error in fold fold_5: Target size (torch.Size([64, 3])) must be the same as input size (torch.Size([64, 1]))
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag11', 'AWS_lag6', 'AWS_lag8', 'AWS_rollmean_22', 'AWS_rollstd_22']
 Data shapes - Train: torch.Size([15364, 48, 17]), Val: torch.Size([15364, 48, 17]), Test: torch.Size([14860, 48, 17])
Error in fold fold_1: Target size (torch.Size([64, 3])) must be the same as input size (torch.Size([64, 1]))




Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag11', 'AWS_lag6', 'AWS_lag8', 'AWS_rollmean_22', 'AWS_rollstd_22']
 Data shapes - Train: torch.Size([47428, 48, 17]), Val: torch.Size([15364, 48, 17]), Test: torch.Size([14860, 48, 17])
Error in fold fold_2: Target size (torch.Size([64, 3])) must be the same as input size (torch.Size([64, 1]))




Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag11', 'AWS_lag6', 'AWS_lag8', 'AWS_rollmean_22', 'AWS_rollstd_22']
 Data shapes - Train: torch.Size([79492, 48, 17]), Val: torch.Size([15364, 48, 17]), Test: torch.Size([14860, 48, 17])
Error in fold fold_3: Target size (torch.Size([64, 3])) must be the same as input size (torch.Size([64, 1]))




Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag11', 'AWS_lag6', 'AWS_lag8', 'AWS_rollmean_22', 'AWS_rollstd_22']
 Data shapes - Train: torch.Size([111556, 48, 17]), Val: torch.Size([15364, 48, 17]), Test: torch.Size([14860, 48, 17])
Error in fold fold_4: Target size (torch.Size([64, 3])) must be the same as input size (torch.Size([64, 1]))




Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag11', 'AWS_lag6', 'AWS_lag8', 'AWS_rollmean_22', 'AWS_rollstd_22']


[I 2025-05-06 18:02:35,386] Trial 3 finished with value: inf and parameters: {'hidden_size': 129, 'num_layers': 1, 'dropout': 0.35196219548208524, 'lr': 0.007233271513823423, 'time_step_in': 48, 'time_step_out': 3, 'stride': 1, 'binary_weight': 0.8444908871538557, 'intensity_weight': 1.9986509198841547, 'regression_weight': 0.9128958096316205, 'rain_threshold': 0.07261255628380234, 'num_lags': 3, 'lag_0': 11, 'lag_1': 6, 'lag_2': 8, 'num_windows': 1, 'window_0': 22}. Best is trial 0 with value: inf.


 Data shapes - Train: torch.Size([143620, 48, 17]), Val: torch.Size([15364, 48, 17]), Test: torch.Size([14860, 48, 17])
Error in fold fold_5: Target size (torch.Size([64, 3])) must be the same as input size (torch.Size([64, 1]))
Using 21 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_lag4', 'AWS_lag8', 'AWS_rollmean_17', 'AWS_rollstd_17', 'AWS_rollmean_21', 'AWS_rollstd_21', 'AWS_rollmean_5', 'AWS_rollstd_5']
 Data shapes - Train: torch.Size([10354, 33, 21]), Val: torch.Size([10354, 33, 21]), Test: torch.Size([9549, 33, 21])
Error in fold fold_1: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 1]))
Using 21 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_lag4', 'AWS_lag8', 'AWS_rollmean_17', 'AWS_rollstd_17', 'AWS_rollmean_21', 'AWS_rollstd_21', 'AWS_rollmean_5', 'AWS_rollstd_5']
 Data shapes - Train: torch.

[I 2025-05-06 18:03:29,270] Trial 4 finished with value: inf and parameters: {'hidden_size': 129, 'num_layers': 3, 'dropout': 0.07739352627702972, 'lr': 0.00023730275506821303, 'time_step_in': 33, 'time_step_out': 2, 'stride': 2, 'binary_weight': 1.4122803443191847, 'intensity_weight': 1.1655726375167894, 'regression_weight': 1.8170210810849146, 'rain_threshold': 0.11288565711121913, 'num_lags': 3, 'lag_0': 12, 'lag_1': 4, 'lag_2': 8, 'num_windows': 3, 'window_0': 17, 'window_1': 21, 'window_2': 5}. Best is trial 0 with value: inf.


 Data shapes - Train: torch.Size([74482, 33, 21]), Val: torch.Size([10354, 33, 21]), Test: torch.Size([9549, 33, 21])
Error in fold fold_5: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 1]))
Using 16 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_lag8', 'AWS_rollmean_17', 'AWS_rollstd_17']
 Data shapes - Train: torch.Size([16700, 41, 16]), Val: torch.Size([16700, 41, 16]), Test: torch.Size([15894, 41, 16])
Error in fold fold_1: Target size (torch.Size([64, 6])) must be the same as input size (torch.Size([64, 1]))
Using 16 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_lag8', 'AWS_rollmean_17', 'AWS_rollstd_17']
 Data shapes - Train: torch.Size([48764, 41, 16]), Val: torch.Size([16700, 41, 16]), Test: torch.Size([15894, 41, 16])
Error in fold fold_2: Target size (torch.Size([64, 6])) must be the same as in

[I 2025-05-06 18:04:52,478] Trial 5 finished with value: inf and parameters: {'hidden_size': 199, 'num_layers': 3, 'dropout': 0.25930052499485756, 'lr': 0.0005873537710846105, 'time_step_in': 41, 'time_step_out': 6, 'stride': 1, 'binary_weight': 1.454562108438869, 'intensity_weight': 0.9564036276978434, 'regression_weight': 1.9709107720615946, 'rain_threshold': 0.11528591896881629, 'num_lags': 2, 'lag_0': 12, 'lag_1': 8, 'num_windows': 1, 'window_0': 17}. Best is trial 0 with value: inf.


 Data shapes - Train: torch.Size([144956, 41, 16]), Val: torch.Size([16700, 41, 16]), Test: torch.Size([15894, 41, 16])
Error in fold fold_5: Target size (torch.Size([64, 6])) must be the same as input size (torch.Size([64, 1]))
Using 20 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag5', 'AWS_lag9', 'AWS_rollmean_11', 'AWS_rollstd_11', 'AWS_rollmean_24', 'AWS_rollstd_24', 'AWS_rollmean_9', 'AWS_rollstd_9']
 Data shapes - Train: torch.Size([4676, 28, 20]), Val: torch.Size([4676, 28, 20]), Test: torch.Size([4137, 28, 20])
Error in fold fold_1: Target size (torch.Size([64, 3])) must be the same as input size (torch.Size([64, 1]))
Using 20 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag5', 'AWS_lag9', 'AWS_rollmean_11', 'AWS_rollstd_11', 'AWS_rollmean_24', 'AWS_rollstd_24', 'AWS_rollmean_9', 'AWS_rollstd_9']
 Data shapes - Train: torch.Size([11022, 28, 20]), Val: 

[I 2025-05-06 18:05:19,975] Trial 6 finished with value: inf and parameters: {'hidden_size': 147, 'num_layers': 3, 'dropout': 0.13981382533049963, 'lr': 0.00026962860113380663, 'time_step_in': 28, 'time_step_out': 3, 'stride': 5, 'binary_weight': 1.4099542841684247, 'intensity_weight': 1.9352316984518725, 'regression_weight': 1.8852537215445038, 'rain_threshold': 0.08868976262346759, 'num_lags': 2, 'lag_0': 5, 'lag_1': 9, 'num_windows': 3, 'window_0': 11, 'window_1': 24, 'window_2': 9}. Best is trial 0 with value: inf.


 Data shapes - Train: torch.Size([30060, 28, 20]), Val: torch.Size([4676, 28, 20]), Test: torch.Size([4137, 28, 20])
Error in fold fold_5: Target size (torch.Size([64, 3])) must be the same as input size (torch.Size([64, 1]))
Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag8', 'AWS_lag3', 'AWS_lag4', 'AWS_rollmean_17', 'AWS_rollstd_17', 'AWS_rollmean_22', 'AWS_rollstd_22']
 Data shapes - Train: torch.Size([14028, 12, 19]), Val: torch.Size([14028, 12, 19]), Test: torch.Size([12414, 12, 19])




   Epoch 1/5, Total Loss: 0.9225, Binary: 0.6601, Intensity: 0.0869, Regression: 0.3341
   Epoch 2/5, Total Loss: 0.7695, Binary: 0.5353, Intensity: 0.0454, Regression: 0.3285
   Epoch 3/5, Total Loss: 0.7162, Binary: 0.4927, Intensity: 0.0424, Regression: 0.3120
   Epoch 4/5, Total Loss: 0.6996, Binary: 0.4717, Intensity: 0.0419, Regression: 0.3153
   Epoch 5/5, Total Loss: 0.6700, Binary: 0.4584, Intensity: 0.0380, Regression: 0.2967
Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag8', 'AWS_lag3', 'AWS_lag4', 'AWS_rollmean_17', 'AWS_rollstd_17', 'AWS_rollmean_22', 'AWS_rollstd_22']
 Data shapes - Train: torch.Size([30060, 12, 19]), Val: torch.Size([14028, 12, 19]), Test: torch.Size([12414, 12, 19])




   Epoch 1/5, Total Loss: 1.1579, Binary: 0.6091, Intensity: 0.1210, Regression: 0.6589
   Epoch 2/5, Total Loss: 0.9575, Binary: 0.4846, Intensity: 0.0829, Regression: 0.5871
   Epoch 3/5, Total Loss: 0.8765, Binary: 0.4351, Intensity: 0.0750, Regression: 0.5485
   Epoch 4/5, Total Loss: 0.8187, Binary: 0.4098, Intensity: 0.0718, Regression: 0.5063
   Epoch 5/5, Total Loss: 0.7945, Binary: 0.3966, Intensity: 0.0685, Regression: 0.4939
Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag8', 'AWS_lag3', 'AWS_lag4', 'AWS_rollmean_17', 'AWS_rollstd_17', 'AWS_rollmean_22', 'AWS_rollstd_22']
 Data shapes - Train: torch.Size([46092, 12, 19]), Val: torch.Size([14028, 12, 19]), Test: torch.Size([12414, 12, 19])




Error in fold fold_3: Expected input batch_size (1) to match target batch_size (0).
Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag8', 'AWS_lag3', 'AWS_lag4', 'AWS_rollmean_17', 'AWS_rollstd_17', 'AWS_rollmean_22', 'AWS_rollstd_22']
 Data shapes - Train: torch.Size([62124, 12, 19]), Val: torch.Size([14028, 12, 19]), Test: torch.Size([12414, 12, 19])




   Epoch 1/5, Total Loss: 1.7005, Binary: 0.3840, Intensity: 0.2462, Regression: 1.4762
Error in fold fold_4: Expected input batch_size (1) to match target batch_size (0).
Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag8', 'AWS_lag3', 'AWS_lag4', 'AWS_rollmean_17', 'AWS_rollstd_17', 'AWS_rollmean_22', 'AWS_rollstd_22']
 Data shapes - Train: torch.Size([78156, 12, 19]), Val: torch.Size([14028, 12, 19]), Test: torch.Size([12414, 12, 19])




   Epoch 1/5, Total Loss: 1.3265, Binary: 0.3351, Intensity: 0.1639, Regression: 1.1437
   Epoch 2/5, Total Loss: 1.1704, Binary: 0.2622, Intensity: 0.1396, Regression: 1.0537
   Epoch 3/5, Total Loss: 1.1103, Binary: 0.2453, Intensity: 0.1334, Regression: 1.0025
   Epoch 4/5, Total Loss: 1.0478, Binary: 0.2327, Intensity: 0.1255, Regression: 0.9451
   Epoch 5/5, Total Loss: 1.0210, Binary: 0.2262, Intensity: 0.1218, Regression: 0.9222


[I 2025-05-06 18:06:33,177] Trial 7 finished with value: 0.7617144584655762 and parameters: {'hidden_size': 125, 'num_layers': 1, 'dropout': 0.05244014771359101, 'lr': 0.0017009128622177793, 'time_step_in': 12, 'time_step_out': 1, 'stride': 2, 'binary_weight': 0.8883307474461508, 'intensity_weight': 0.9106596154724818, 'regression_weight': 0.7689711121488019, 'rain_threshold': 0.07239539394997131, 'num_lags': 3, 'lag_0': 8, 'lag_1': 3, 'lag_2': 4, 'num_windows': 2, 'window_0': 17, 'window_1': 22}. Best is trial 7 with value: 0.7617144584655762.


Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag11', 'AWS_rollmean_16', 'AWS_rollstd_16', 'AWS_rollmean_15', 'AWS_rollstd_15', 'AWS_rollmean_20', 'AWS_rollstd_20']
 Data shapes - Train: torch.Size([2672, 48, 19]), Val: torch.Size([2672, 48, 19]), Test: torch.Size([2630, 48, 19])
Error in fold fold_1: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 1]))




Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag11', 'AWS_rollmean_16', 'AWS_rollstd_16', 'AWS_rollmean_15', 'AWS_rollstd_15', 'AWS_rollmean_20', 'AWS_rollstd_20']
 Data shapes - Train: torch.Size([8016, 48, 19]), Val: torch.Size([2672, 48, 19]), Test: torch.Size([2630, 48, 19])
Error in fold fold_2: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 1]))




Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag11', 'AWS_rollmean_16', 'AWS_rollstd_16', 'AWS_rollmean_15', 'AWS_rollstd_15', 'AWS_rollmean_20', 'AWS_rollstd_20']
 Data shapes - Train: torch.Size([13360, 48, 19]), Val: torch.Size([2672, 48, 19]), Test: torch.Size([2630, 48, 19])
Error in fold fold_3: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 1]))




Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag11', 'AWS_rollmean_16', 'AWS_rollstd_16', 'AWS_rollmean_15', 'AWS_rollstd_15', 'AWS_rollmean_20', 'AWS_rollstd_20']
 Data shapes - Train: torch.Size([18704, 48, 19]), Val: torch.Size([2672, 48, 19]), Test: torch.Size([2630, 48, 19])
Error in fold fold_4: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 1]))




Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag11', 'AWS_rollmean_16', 'AWS_rollstd_16', 'AWS_rollmean_15', 'AWS_rollstd_15', 'AWS_rollmean_20', 'AWS_rollstd_20']


[I 2025-05-06 18:07:03,365] Trial 8 finished with value: inf and parameters: {'hidden_size': 94, 'num_layers': 1, 'dropout': 0.10894102766737601, 'lr': 0.0002378749756029996, 'time_step_in': 48, 'time_step_out': 2, 'stride': 6, 'binary_weight': 1.1343122818620028, 'intensity_weight': 1.2783867242056575, 'regression_weight': 1.059853336365916, 'rain_threshold': 0.14315765058835656, 'num_lags': 1, 'lag_0': 11, 'num_windows': 3, 'window_0': 16, 'window_1': 15, 'window_2': 20}. Best is trial 7 with value: 0.7617144584655762.


 Data shapes - Train: torch.Size([24048, 48, 19]), Val: torch.Size([2672, 48, 19]), Test: torch.Size([2630, 48, 19])
Error in fold fold_5: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 1]))
Using 20 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag1', 'AWS_lag1', 'AWS_rollmean_18', 'AWS_rollstd_18', 'AWS_rollmean_15', 'AWS_rollstd_15', 'AWS_rollmean_5', 'AWS_rollstd_5']
 Data shapes - Train: torch.Size([4008, 36, 20]), Val: torch.Size([4008, 36, 20]), Test: torch.Size([3678, 36, 20])
Error in fold fold_1: Target size (torch.Size([64, 3])) must be the same as input size (torch.Size([64, 1]))
Using 20 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag1', 'AWS_lag1', 'AWS_rollmean_18', 'AWS_rollstd_18', 'AWS_rollmean_15', 'AWS_rollstd_15', 'AWS_rollmean_5', 'AWS_rollstd_5']
 Data shapes - Train: torch.Size([10354, 36, 20]), Val: tor

[I 2025-05-06 18:07:34,233] Trial 9 finished with value: inf and parameters: {'hidden_size': 144, 'num_layers': 3, 'dropout': 0.11738169457463071, 'lr': 0.0021249846087596264, 'time_step_in': 36, 'time_step_out': 3, 'stride': 5, 'binary_weight': 1.1306067289330535, 'intensity_weight': 1.4369835321294635, 'regression_weight': 1.7061860985883863, 'rain_threshold': 0.19614394866051021, 'num_lags': 2, 'lag_0': 1, 'lag_1': 1, 'num_windows': 3, 'window_0': 18, 'window_1': 15, 'window_2': 5}. Best is trial 7 with value: 0.7617144584655762.


 Data shapes - Train: torch.Size([29726, 36, 20]), Val: torch.Size([4008, 36, 20]), Test: torch.Size([3678, 36, 20])
Error in fold fold_5: Target size (torch.Size([64, 3])) must be the same as input size (torch.Size([64, 1]))
Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag8', 'AWS_lag12', 'AWS_lag1', 'AWS_rollmean_4', 'AWS_rollstd_4', 'AWS_rollmean_19', 'AWS_rollstd_19']
 Data shapes - Train: torch.Size([9352, 13, 19]), Val: torch.Size([9352, 13, 19]), Test: torch.Size([8221, 13, 19])
   Epoch 1/5, Total Loss: 2.3461, Binary: 0.6929, Intensity: 0.9614, Regression: 0.7648
   Epoch 2/5, Total Loss: 1.9954, Binary: 0.6940, Intensity: 0.4749, Regression: 0.6442
   Epoch 3/5, Total Loss: 1.8117, Binary: 0.6928, Intensity: 0.1625, Regression: 0.6567
   Epoch 4/5, Total Loss: 1.7761, Binary: 0.6925, Intensity: 0.1033, Regression: 0.6579
   Epoch 5/5, Total Loss: 1.7510, Binary: 0.6925, Intensity: 0.0815, Regression: 0.63

[I 2025-05-06 18:08:25,654] Trial 10 finished with value: 0.5219628810882568 and parameters: {'hidden_size': 37, 'num_layers': 2, 'dropout': 0.002768626437645645, 'lr': 0.00010234175182783413, 'time_step_in': 13, 'time_step_out': 1, 'stride': 3, 'binary_weight': 1.9985549646187208, 'intensity_weight': 0.6007181011338533, 'regression_weight': 0.5017038592281455, 'rain_threshold': 0.05237908486540391, 'num_lags': 3, 'lag_0': 8, 'lag_1': 12, 'lag_2': 1, 'num_windows': 2, 'window_0': 4, 'window_1': 19}. Best is trial 10 with value: 0.5219628810882568.


Error in fold fold_5: Expected input batch_size (1) to match target batch_size (0).
Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag8', 'AWS_lag12', 'AWS_lag1', 'AWS_rollmean_3', 'AWS_rollstd_3', 'AWS_rollmean_19', 'AWS_rollstd_19']
 Data shapes - Train: torch.Size([9352, 12, 19]), Val: torch.Size([9352, 12, 19]), Test: torch.Size([8370, 12, 19])
   Epoch 1/5, Total Loss: 2.1157, Binary: 0.6925, Intensity: 0.9939, Regression: 0.2930
   Epoch 2/5, Total Loss: 1.7032, Binary: 0.6921, Intensity: 0.3675, Regression: 0.2493
   Epoch 3/5, Total Loss: 1.5462, Binary: 0.6924, Intensity: 0.1164, Regression: 0.2449
   Epoch 4/5, Total Loss: 1.5096, Binary: 0.6924, Intensity: 0.0681, Regression: 0.2329
   Epoch 5/5, Total Loss: 1.4990, Binary: 0.6924, Intensity: 0.0539, Regression: 0.2300
Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag8', 'AWS_lag12', 'A

[I 2025-05-06 18:09:23,294] Trial 11 finished with value: 0.5288707415262858 and parameters: {'hidden_size': 37, 'num_layers': 2, 'dropout': 0.0034084434556533386, 'lr': 0.00011273359934536066, 'time_step_in': 12, 'time_step_out': 1, 'stride': 3, 'binary_weight': 1.9266367340390549, 'intensity_weight': 0.6173424946589217, 'regression_weight': 0.5730391201583253, 'rain_threshold': 0.05498855929074676, 'num_lags': 3, 'lag_0': 8, 'lag_1': 12, 'lag_2': 1, 'num_windows': 2, 'window_0': 3, 'window_1': 19}. Best is trial 10 with value: 0.5219628810882568.


Error in fold fold_5: Expected input batch_size (1) to match target batch_size (0).
Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag7', 'AWS_lag12', 'AWS_lag1', 'AWS_rollmean_3', 'AWS_rollstd_3', 'AWS_rollmean_18', 'AWS_rollstd_18']
 Data shapes - Train: torch.Size([9352, 13, 19]), Val: torch.Size([9352, 13, 19]), Test: torch.Size([8221, 13, 19])
   Epoch 1/5, Total Loss: 2.3661, Binary: 0.6931, Intensity: 1.1221, Regression: 0.7972
   Epoch 2/5, Total Loss: 2.0396, Binary: 0.6967, Intensity: 0.6127, Regression: 0.6599
   Epoch 3/5, Total Loss: 1.8693, Binary: 0.6926, Intensity: 0.3015, Regression: 0.6543
   Epoch 4/5, Total Loss: 1.7951, Binary: 0.6817, Intensity: 0.1804, Regression: 0.6721
   Epoch 5/5, Total Loss: 1.6747, Binary: 0.6295, Intensity: 0.1308, Regression: 0.6873
Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag7', 'AWS_lag12', 'A

[I 2025-05-06 18:10:14,878] Trial 12 finished with value: 0.5165307025114695 and parameters: {'hidden_size': 38, 'num_layers': 2, 'dropout': 0.005332318582502013, 'lr': 0.00010161003023622163, 'time_step_in': 13, 'time_step_out': 1, 'stride': 3, 'binary_weight': 1.9751903160127513, 'intensity_weight': 0.5119489307855372, 'regression_weight': 0.5302031552205841, 'rain_threshold': 0.05402830140201603, 'num_lags': 3, 'lag_0': 7, 'lag_1': 12, 'lag_2': 1, 'num_windows': 2, 'window_0': 3, 'window_1': 18}. Best is trial 12 with value: 0.5165307025114695.


Error in fold fold_5: Expected input batch_size (1) to match target batch_size (0).
Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag6', 'AWS_lag12', 'AWS_lag1', 'AWS_rollmean_3', 'AWS_rollstd_3', 'AWS_rollmean_18', 'AWS_rollstd_18']
 Data shapes - Train: torch.Size([6680, 19, 19]), Val: torch.Size([6680, 19, 19]), Test: torch.Size([5821, 19, 19])
   Epoch 1/5, Total Loss: 2.1263, Binary: 0.6926, Intensity: 0.8920, Regression: 0.3184
   Epoch 2/5, Total Loss: 1.9109, Binary: 0.6915, Intensity: 0.6485, Regression: 0.2578
   Epoch 3/5, Total Loss: 1.6015, Binary: 0.6916, Intensity: 0.2368, Regression: 0.2539
   Epoch 4/5, Total Loss: 1.4865, Binary: 0.6785, Intensity: 0.1243, Regression: 0.2411
   Epoch 5/5, Total Loss: 1.4315, Binary: 0.6588, Intensity: 0.0865, Regression: 0.2609
Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag6', 'AWS_lag12', 'A

[I 2025-05-06 18:10:58,434] Trial 13 finished with value: 0.6769302487373352 and parameters: {'hidden_size': 32, 'num_layers': 2, 'dropout': 0.008443468060745923, 'lr': 0.00011281070754970741, 'time_step_in': 19, 'time_step_out': 1, 'stride': 4, 'binary_weight': 1.8707291663063446, 'intensity_weight': 0.7472020920472586, 'regression_weight': 0.5153166214764762, 'rain_threshold': 0.08801366382658779, 'num_lags': 3, 'lag_0': 6, 'lag_1': 12, 'lag_2': 1, 'num_windows': 2, 'window_0': 3, 'window_1': 18}. Best is trial 12 with value: 0.5165307025114695.


Error in fold fold_5: Expected input batch_size (1) to match target batch_size (0).
Using 18 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag8', 'AWS_lag10', 'AWS_rollmean_8', 'AWS_rollstd_8', 'AWS_rollmean_13', 'AWS_rollstd_13']
 Data shapes - Train: torch.Size([8350, 20, 18]), Val: torch.Size([8350, 20, 18]), Test: torch.Size([7586, 20, 18])
Error in fold fold_1: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 1]))
Using 18 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag8', 'AWS_lag10', 'AWS_rollmean_8', 'AWS_rollstd_8', 'AWS_rollmean_13', 'AWS_rollstd_13']
 Data shapes - Train: torch.Size([19038, 20, 18]), Val: torch.Size([8350, 20, 18]), Test: torch.Size([7586, 20, 18])
Error in fold fold_2: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 1]))
Using 18 features: ['TCW', 'TCLW', 'R250', 'R50

[I 2025-05-06 18:11:26,922] Trial 14 finished with value: inf and parameters: {'hidden_size': 65, 'num_layers': 2, 'dropout': 0.23386341446734943, 'lr': 0.0004947708492308451, 'time_step_in': 20, 'time_step_out': 2, 'stride': 3, 'binary_weight': 1.730750076735032, 'intensity_weight': 0.7785143081437897, 'regression_weight': 1.4575401442837561, 'rain_threshold': 0.08296780283534444, 'num_lags': 2, 'lag_0': 8, 'lag_1': 10, 'num_windows': 2, 'window_0': 8, 'window_1': 13}. Best is trial 12 with value: 0.5165307025114695.


 Data shapes - Train: torch.Size([51102, 20, 18]), Val: torch.Size([8350, 20, 18]), Test: torch.Size([7586, 20, 18])
Error in fold fold_5: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 1]))
Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag9', 'AWS_lag11', 'AWS_lag11', 'AWS_rollmean_7', 'AWS_rollstd_7', 'AWS_rollmean_18', 'AWS_rollstd_18']
 Data shapes - Train: torch.Size([12692, 16, 19]), Val: torch.Size([12692, 16, 19]), Test: torch.Size([11369, 16, 19])
Error in fold fold_1: Target size (torch.Size([64, 5])) must be the same as input size (torch.Size([64, 1]))
Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag9', 'AWS_lag11', 'AWS_lag11', 'AWS_rollmean_7', 'AWS_rollstd_7', 'AWS_rollmean_18', 'AWS_rollstd_18']
 Data shapes - Train: torch.Size([28724, 16, 19]), Val: torch.Size([12692, 16, 19]), Test: torch.Size(

[I 2025-05-06 18:12:00,126] Trial 15 finished with value: inf and parameters: {'hidden_size': 72, 'num_layers': 2, 'dropout': 0.31641296033185307, 'lr': 0.00011826509018611375, 'time_step_in': 16, 'time_step_out': 5, 'stride': 2, 'binary_weight': 1.7161584695816101, 'intensity_weight': 1.6102859795038598, 'regression_weight': 0.7464920529102115, 'rain_threshold': 0.0506402050410921, 'num_lags': 3, 'lag_0': 9, 'lag_1': 11, 'lag_2': 11, 'num_windows': 2, 'window_0': 7, 'window_1': 18}. Best is trial 12 with value: 0.5165307025114695.


 Data shapes - Train: torch.Size([76820, 16, 19]), Val: torch.Size([12692, 16, 19]), Test: torch.Size([11369, 16, 19])
Error in fold fold_5: Target size (torch.Size([64, 5])) must be the same as input size (torch.Size([64, 1]))
Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag5', 'AWS_lag6', 'AWS_lag3', 'AWS_rollmean_6', 'AWS_rollstd_6', 'AWS_rollmean_4', 'AWS_rollstd_4']
 Data shapes - Train: torch.Size([6012, 26, 19]), Val: torch.Size([6012, 26, 19]), Test: torch.Size([5330, 26, 19])
   Epoch 1/5, Total Loss: 2.1912, Binary: 0.6937, Intensity: 0.5669, Regression: 0.5216
   Epoch 2/5, Total Loss: 1.9222, Binary: 0.6936, Intensity: 0.0715, Regression: 0.5121
   Epoch 3/5, Total Loss: 1.9082, Binary: 0.6935, Intensity: 0.0627, Regression: 0.5056
   Epoch 4/5, Total Loss: 1.8886, Binary: 0.6924, Intensity: 0.0618, Regression: 0.4936
   Epoch 5/5, Total Loss: 1.9141, Binary: 0.6886, Intensity: 0.0618, Regression: 0.515

[I 2025-05-06 18:12:38,341] Trial 16 finished with value: 0.5537008047103882 and parameters: {'hidden_size': 64, 'num_layers': 2, 'dropout': 0.44259942096273946, 'lr': 0.00044864672056819216, 'time_step_in': 26, 'time_step_out': 1, 'stride': 4, 'binary_weight': 1.6577851159499568, 'intensity_weight': 0.5150677224545994, 'regression_weight': 1.4361829828596822, 'rain_threshold': 0.1446764890529073, 'num_lags': 3, 'lag_0': 5, 'lag_1': 6, 'lag_2': 3, 'num_windows': 2, 'window_0': 6, 'window_1': 4}. Best is trial 12 with value: 0.5165307025114695.


Error in fold fold_5: Expected input batch_size (1) to match target batch_size (0).
Using 16 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag6', 'AWS_lag10', 'AWS_rollmean_11', 'AWS_rollstd_11']
 Data shapes - Train: torch.Size([8016, 24, 16]), Val: torch.Size([8016, 24, 16]), Test: torch.Size([7177, 24, 16])
Error in fold fold_1: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 1]))
Using 16 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag6', 'AWS_lag10', 'AWS_rollmean_11', 'AWS_rollstd_11']
 Data shapes - Train: torch.Size([18704, 24, 16]), Val: torch.Size([8016, 24, 16]), Test: torch.Size([7177, 24, 16])
Error in fold fold_2: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 1]))
Using 16 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_

[I 2025-05-06 18:13:04,837] Trial 17 finished with value: inf and parameters: {'hidden_size': 46, 'num_layers': 2, 'dropout': 0.04235852920906097, 'lr': 0.0010294980200228245, 'time_step_in': 24, 'time_step_out': 2, 'stride': 3, 'binary_weight': 1.9584912395591436, 'intensity_weight': 1.003647202519955, 'regression_weight': 0.6734386592878449, 'rain_threshold': 0.09674355596912072, 'num_lags': 2, 'lag_0': 6, 'lag_1': 10, 'num_windows': 1, 'window_0': 11}. Best is trial 12 with value: 0.5165307025114695.


 Data shapes - Train: torch.Size([50768, 24, 16]), Val: torch.Size([8016, 24, 16]), Test: torch.Size([7177, 24, 16])
Error in fold fold_5: Target size (torch.Size([64, 2])) must be the same as input size (torch.Size([64, 1]))
Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag9', 'AWS_rollmean_5', 'AWS_rollstd_5']
 Data shapes - Train: torch.Size([5344, 16, 15]), Val: torch.Size([5344, 16, 15]), Test: torch.Size([4693, 16, 15])
Error in fold fold_1: Target size (torch.Size([64, 4])) must be the same as input size (torch.Size([64, 1]))




Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag9', 'AWS_rollmean_5', 'AWS_rollstd_5']
 Data shapes - Train: torch.Size([11690, 16, 15]), Val: torch.Size([5344, 16, 15]), Test: torch.Size([4693, 16, 15])
Error in fold fold_2: Target size (torch.Size([64, 4])) must be the same as input size (torch.Size([64, 1]))




Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag9', 'AWS_rollmean_5', 'AWS_rollstd_5']
 Data shapes - Train: torch.Size([18036, 16, 15]), Val: torch.Size([5344, 16, 15]), Test: torch.Size([4693, 16, 15])
Error in fold fold_3: Target size (torch.Size([64, 4])) must be the same as input size (torch.Size([64, 1]))




Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag9', 'AWS_rollmean_5', 'AWS_rollstd_5']
 Data shapes - Train: torch.Size([24382, 16, 15]), Val: torch.Size([5344, 16, 15]), Test: torch.Size([4693, 16, 15])
Error in fold fold_4: Target size (torch.Size([64, 4])) must be the same as input size (torch.Size([64, 1]))




Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag9', 'AWS_rollmean_5', 'AWS_rollstd_5']


[I 2025-05-06 18:13:20,698] Trial 18 finished with value: inf and parameters: {'hidden_size': 186, 'num_layers': 1, 'dropout': 0.1605091102156686, 'lr': 0.00018063827557800912, 'time_step_in': 16, 'time_step_out': 4, 'stride': 5, 'binary_weight': 1.7790303259752918, 'intensity_weight': 0.7291116301486227, 'regression_weight': 0.5057955644290186, 'rain_threshold': 0.065131313793832, 'num_lags': 1, 'lag_0': 9, 'num_windows': 1, 'window_0': 5}. Best is trial 12 with value: 0.5165307025114695.


 Data shapes - Train: torch.Size([31062, 16, 15]), Val: torch.Size([5344, 16, 15]), Test: torch.Size([4693, 16, 15])
Error in fold fold_5: Target size (torch.Size([64, 4])) must be the same as input size (torch.Size([64, 1]))
Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag4', 'AWS_lag12', 'AWS_lag3', 'AWS_rollmean_10', 'AWS_rollstd_10', 'AWS_rollmean_12', 'AWS_rollstd_12']
 Data shapes - Train: torch.Size([13694, 15, 19]), Val: torch.Size([13694, 15, 19]), Test: torch.Size([12068, 15, 19])
   Epoch 1/5, Total Loss: 2.2343, Binary: 0.6684, Intensity: 0.2083, Regression: 0.5592
   Epoch 2/5, Total Loss: 1.8820, Binary: 0.5440, Intensity: 0.0568, Regression: 0.5563
   Epoch 3/5, Total Loss: 1.8222, Binary: 0.5183, Intensity: 0.0550, Regression: 0.5510
   Epoch 4/5, Total Loss: 1.8093, Binary: 0.5072, Intensity: 0.0557, Regression: 0.5573
   Epoch 5/5, Total Loss: 1.7753, Binary: 0.4910, Intensity: 0.0565, Regression:

[I 2025-05-06 18:14:14,841] Trial 19 finished with value: 0.6813070774078369 and parameters: {'hidden_size': 98, 'num_layers': 2, 'dropout': 0.21916287698800763, 'lr': 0.0003624720632731937, 'time_step_in': 15, 'time_step_out': 1, 'stride': 2, 'binary_weight': 1.988888179868371, 'intensity_weight': 0.6674530368985832, 'regression_weight': 1.3697061285294487, 'rain_threshold': 0.13833727087146763, 'num_lags': 3, 'lag_0': 4, 'lag_1': 12, 'lag_2': 3, 'num_windows': 2, 'window_0': 10, 'window_1': 12}. Best is trial 12 with value: 0.5165307025114695.


Error in fold fold_5: Expected input batch_size (1) to match target batch_size (0).
 Best parameters for 2020-04:
  hidden_size: 38
  num_layers: 2
  dropout: 0.005332318582502013
  lr: 0.00010161003023622163
  time_step_in: 13
  time_step_out: 1
  stride: 3
  binary_weight: 1.9751903160127513
  intensity_weight: 0.5119489307855372
  regression_weight: 0.5302031552205841
  rain_threshold: 0.05402830140201603
  num_lags: 3
  lag_0: 7
  lag_1: 12
  lag_2: 1
  num_windows: 2
  window_0: 3
  window_1: 18
 Best parameters saved to hybrid_best_params_2020-04.json

 Final evaluation on 2020-04, fold_1 test set

 Training hybrid model for 2020-04, fold_1 with best parameters
Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag7', 'AWS_lag12', 'AWS_lag1', 'AWS_rollmean_3', 'AWS_rollstd_3', 'AWS_rollmean_18', 'AWS_rollstd_18']
 Data shapes - Train: torch.Size([9352, 13, 19]), Val: torch.Size([9352, 13, 19]), Test: torch.Size([82

[I 2025-05-06 18:14:19,318] A new study created in memory with name: hybrid_2020-10_study


 Error during optimization for 2020-04: Expected input batch_size (1) to match target batch_size (0).

###### Processing month: 2020-10
 Finding optimal hyperparameters for hybrid model on 2020-10 using cross-validation...
Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag10', 'AWS_rollmean_3', 'AWS_rollstd_3']
 Data shapes - Train: torch.Size([4564, 31, 15]), Val: torch.Size([5507, 31, 15]), Test: torch.Size([7374, 31, 15])




   Epoch 1/5, Total Loss: 8.4038, Binary: 0.6688, Intensity: 0.9668, Regression: 7.2319
   Epoch 2/5, Total Loss: 5.1103, Binary: 0.6053, Intensity: 0.6893, Regression: 4.0585
   Epoch 3/5, Total Loss: 4.2694, Binary: 0.5332, Intensity: 0.5079, Regression: 3.4461
   Epoch 4/5, Total Loss: 3.9929, Binary: 0.4965, Intensity: 0.4538, Regression: 3.2516
   Epoch 5/5, Total Loss: 3.8321, Binary: 0.4606, Intensity: 0.4347, Regression: 3.1388
Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag10', 'AWS_rollmean_3', 'AWS_rollstd_3']
 Data shapes - Train: torch.Size([11371, 31, 15]), Val: torch.Size([5513, 31, 15]), Test: torch.Size([7230, 31, 15])




   Epoch 1/5, Total Loss: 6.1148, Binary: 0.6578, Intensity: 0.8388, Regression: 4.9100
   Epoch 2/5, Total Loss: 4.0770, Binary: 0.5137, Intensity: 0.4730, Regression: 3.3008
   Epoch 3/5, Total Loss: 3.7539, Binary: 0.4300, Intensity: 0.4304, Regression: 3.0917
   Epoch 4/5, Total Loss: 3.5576, Binary: 0.3692, Intensity: 0.4124, Regression: 2.9654
   Epoch 5/5, Total Loss: 3.4073, Binary: 0.3458, Intensity: 0.3984, Regression: 2.8443
Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag10', 'AWS_rollmean_3', 'AWS_rollstd_3']
 Data shapes - Train: torch.Size([18028, 31, 15]), Val: torch.Size([5388, 31, 15]), Test: torch.Size([7008, 31, 15])




   Epoch 1/5, Total Loss: 5.3166, Binary: 0.6001, Intensity: 0.6447, Regression: 4.3444
   Epoch 2/5, Total Loss: 3.7646, Binary: 0.4647, Intensity: 0.4267, Regression: 3.0709
   Epoch 3/5, Total Loss: 3.5225, Binary: 0.4125, Intensity: 0.4128, Regression: 2.8804
   Epoch 4/5, Total Loss: 3.3650, Binary: 0.3633, Intensity: 0.4039, Regression: 2.7727
   Epoch 5/5, Total Loss: 3.2710, Binary: 0.3217, Intensity: 0.3944, Regression: 2.7265
Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag10', 'AWS_rollmean_3', 'AWS_rollstd_3']
 Data shapes - Train: torch.Size([24595, 31, 15]), Val: torch.Size([5120, 31, 15]), Test: torch.Size([6797, 31, 15])




   Epoch 1/5, Total Loss: 4.9321, Binary: 0.5591, Intensity: 0.5804, Regression: 4.0498
   Epoch 2/5, Total Loss: 3.4586, Binary: 0.4053, Intensity: 0.4087, Regression: 2.8236
   Epoch 3/5, Total Loss: 3.2000, Binary: 0.3541, Intensity: 0.3881, Regression: 2.6224
   Epoch 4/5, Total Loss: 3.0736, Binary: 0.3313, Intensity: 0.3749, Regression: 2.5256
   Epoch 5/5, Total Loss: 2.9682, Binary: 0.3121, Intensity: 0.3659, Regression: 2.4425
Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag10', 'AWS_rollmean_3', 'AWS_rollstd_3']
 Data shapes - Train: torch.Size([31161, 31, 15]), Val: torch.Size([4899, 31, 15]), Test: torch.Size([6632, 31, 15])




   Epoch 1/5, Total Loss: 4.6780, Binary: 0.5509, Intensity: 0.5636, Regression: 3.8027
   Epoch 2/5, Total Loss: 3.5390, Binary: 0.4166, Intensity: 0.4126, Regression: 2.8942
   Epoch 3/5, Total Loss: 3.3138, Binary: 0.3690, Intensity: 0.3956, Regression: 2.7211
   Epoch 4/5, Total Loss: 3.1679, Binary: 0.3257, Intensity: 0.3816, Regression: 2.6260


[I 2025-05-06 18:15:11,028] Trial 0 finished with value: 0.9751351237297058 and parameters: {'hidden_size': 194, 'num_layers': 1, 'dropout': 0.2718704579992738, 'lr': 0.00044891313482111033, 'time_step_in': 31, 'time_step_out': 1, 'stride': 5, 'binary_weight': 0.9829917900820302, 'intensity_weight': 1.1452625794652946, 'regression_weight': 0.9180316632117681, 'rain_threshold': 0.19499582555291312, 'num_lags': 1, 'lag_0': 10, 'num_windows': 1, 'window_0': 3}. Best is trial 0 with value: 0.9751351237297058.


   Epoch 5/5, Total Loss: 3.0712, Binary: 0.2991, Intensity: 0.3690, Regression: 2.5647
Using 16 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag11', 'AWS_lag1', 'AWS_rollmean_14', 'AWS_rollstd_14']
 Data shapes - Train: torch.Size([4278, 47, 16]), Val: torch.Size([6056, 47, 16]), Test: torch.Size([8448, 47, 16])
Error in fold fold_1: Target size (torch.Size([128, 2])) must be the same as input size (torch.Size([128, 1]))
Using 16 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag11', 'AWS_lag1', 'AWS_rollmean_14', 'AWS_rollstd_14']
 Data shapes - Train: torch.Size([12552, 47, 16]), Val: torch.Size([6086, 47, 16]), Test: torch.Size([8151, 47, 16])
Error in fold fold_2: Target size (torch.Size([128, 2])) must be the same as input size (torch.Size([128, 1]))
Using 16 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV

[I 2025-05-06 18:15:45,592] Trial 1 finished with value: inf and parameters: {'hidden_size': 55, 'num_layers': 2, 'dropout': 0.4336171127833834, 'lr': 0.00016753170534725348, 'time_step_in': 47, 'time_step_out': 2, 'stride': 4, 'binary_weight': 1.9206525338027012, 'intensity_weight': 1.4235504281459936, 'regression_weight': 1.063059036417636, 'rain_threshold': 0.10574213895424228, 'num_lags': 2, 'lag_0': 11, 'lag_1': 1, 'num_windows': 1, 'window_0': 14}. Best is trial 0 with value: 0.9751351237297058.


 Data shapes - Train: torch.Size([37392, 47, 16]), Val: torch.Size([4928, 47, 16]), Test: torch.Size([6999, 47, 16])
Error in fold fold_5: Target size (torch.Size([128, 2])) must be the same as input size (torch.Size([128, 1]))
Using 18 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag2', 'AWS_lag11', 'AWS_rollmean_10', 'AWS_rollstd_10', 'AWS_rollmean_4', 'AWS_rollstd_4']
 Data shapes - Train: torch.Size([26774, 16, 18]), Val: torch.Size([29414, 16, 18]), Test: torch.Size([38708, 16, 18])
Error in fold fold_1: Target size (torch.Size([128, 4])) must be the same as input size (torch.Size([128, 1]))
Using 18 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag2', 'AWS_lag11', 'AWS_rollmean_10', 'AWS_rollstd_10', 'AWS_rollmean_4', 'AWS_rollstd_4']
 Data shapes - Train: torch.Size([59894, 16, 18]), Val: torch.Size([29449, 16, 18]), Test: torch.Size([38289, 16, 18])
Error

[I 2025-05-06 18:16:42,991] Trial 2 finished with value: inf and parameters: {'hidden_size': 132, 'num_layers': 3, 'dropout': 0.29700936073577994, 'lr': 0.0001553535002187332, 'time_step_in': 16, 'time_step_out': 4, 'stride': 1, 'binary_weight': 0.5254854745469152, 'intensity_weight': 1.7071590698465757, 'regression_weight': 1.7033660345643424, 'rain_threshold': 0.10930822115677283, 'num_lags': 2, 'lag_0': 2, 'lag_1': 11, 'num_windows': 2, 'window_0': 10, 'window_1': 4}. Best is trial 0 with value: 0.9751351237297058.


 Data shapes - Train: torch.Size([159254, 16, 18]), Val: torch.Size([27629, 16, 18]), Test: torch.Size([36225, 16, 18])
Error in fold fold_5: Target size (torch.Size([128, 4])) must be the same as input size (torch.Size([128, 1]))
Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag8', 'AWS_rollmean_21', 'AWS_rollstd_21', 'AWS_rollmean_12', 'AWS_rollstd_12', 'AWS_rollmean_17', 'AWS_rollstd_17']
 Data shapes - Train: torch.Size([8889, 42, 19]), Val: torch.Size([12233, 42, 19]), Test: torch.Size([16983, 42, 19])
Error in fold fold_1: Target size (torch.Size([128, 5])) must be the same as input size (torch.Size([128, 1]))
Using 19 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag8', 'AWS_rollmean_21', 'AWS_rollstd_21', 'AWS_rollmean_12', 'AWS_rollstd_12', 'AWS_rollmean_17', 'AWS_rollstd_17']
 Data shapes - Train: torch.Size([25438, 42, 19]), Val: torch.Size([12

[I 2025-05-06 18:17:49,571] Trial 3 finished with value: inf and parameters: {'hidden_size': 249, 'num_layers': 3, 'dropout': 0.33791356717779897, 'lr': 0.002507078055418594, 'time_step_in': 42, 'time_step_out': 5, 'stride': 2, 'binary_weight': 1.1529574194892842, 'intensity_weight': 1.4361334293768317, 'regression_weight': 1.1949050881903043, 'rain_threshold': 0.12317820344893969, 'num_lags': 1, 'lag_0': 8, 'num_windows': 3, 'window_0': 21, 'window_1': 12, 'window_2': 17}. Best is trial 0 with value: 0.9751351237297058.


 Data shapes - Train: torch.Size([75118, 42, 19]), Val: torch.Size([10089, 42, 19]), Test: torch.Size([14127, 42, 19])
Error in fold fold_5: Target size (torch.Size([128, 5])) must be the same as input size (torch.Size([128, 1]))
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag11', 'AWS_lag3', 'AWS_lag1', 'AWS_rollmean_12', 'AWS_rollstd_12']
 Data shapes - Train: torch.Size([9888, 36, 17]), Val: torch.Size([12733, 36, 17]), Test: torch.Size([17426, 36, 17])
Error in fold fold_1: Target size (torch.Size([128, 6])) must be the same as input size (torch.Size([128, 1]))




Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag11', 'AWS_lag3', 'AWS_lag1', 'AWS_rollmean_12', 'AWS_rollstd_12']
 Data shapes - Train: torch.Size([26440, 36, 17]), Val: torch.Size([12768, 36, 17]), Test: torch.Size([16949, 36, 17])
Error in fold fold_2: Target size (torch.Size([128, 6])) must be the same as input size (torch.Size([128, 1]))




Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag11', 'AWS_lag3', 'AWS_lag1', 'AWS_rollmean_12', 'AWS_rollstd_12']
 Data shapes - Train: torch.Size([43000, 36, 17]), Val: torch.Size([12260, 36, 17]), Test: torch.Size([16285, 36, 17])
Error in fold fold_3: Target size (torch.Size([128, 6])) must be the same as input size (torch.Size([128, 1]))




Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag11', 'AWS_lag3', 'AWS_lag1', 'AWS_rollmean_12', 'AWS_rollstd_12']
 Data shapes - Train: torch.Size([59560, 36, 17]), Val: torch.Size([11582, 36, 17]), Test: torch.Size([15427, 36, 17])
Error in fold fold_4: Target size (torch.Size([128, 6])) must be the same as input size (torch.Size([128, 1]))




Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag11', 'AWS_lag3', 'AWS_lag1', 'AWS_rollmean_12', 'AWS_rollstd_12']


[I 2025-05-06 18:18:41,604] Trial 4 finished with value: inf and parameters: {'hidden_size': 89, 'num_layers': 1, 'dropout': 0.07802411256076464, 'lr': 0.00011885365177484947, 'time_step_in': 36, 'time_step_out': 6, 'stride': 2, 'binary_weight': 0.922236585303557, 'intensity_weight': 0.6945336232757245, 'regression_weight': 0.6593757356650781, 'rain_threshold': 0.12044069154960703, 'num_lags': 3, 'lag_0': 11, 'lag_1': 3, 'lag_2': 1, 'num_windows': 1, 'window_0': 12}. Best is trial 0 with value: 0.9751351237297058.


 Data shapes - Train: torch.Size([76120, 36, 17]), Val: torch.Size([10847, 36, 17]), Test: torch.Size([14819, 36, 17])
Error in fold fold_5: Target size (torch.Size([128, 6])) must be the same as input size (torch.Size([128, 1]))
Using 16 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag9', 'AWS_lag1', 'AWS_rollmean_10', 'AWS_rollstd_10']
 Data shapes - Train: torch.Size([7278, 12, 16]), Val: torch.Size([7731, 12, 16]), Test: torch.Size([10083, 12, 16])




   Epoch 1/5, Total Loss: 16.6429, Binary: 0.6753, Intensity: 0.9660, Regression: 7.4616
   Epoch 2/5, Total Loss: 10.6518, Binary: 0.6525, Intensity: 0.7604, Regression: 4.5058
   Epoch 3/5, Total Loss: 8.9127, Binary: 0.6081, Intensity: 0.6064, Regression: 3.7566
   Epoch 4/5, Total Loss: 8.3230, Binary: 0.5677, Intensity: 0.4980, Regression: 3.5691
   Epoch 5/5, Total Loss: 8.1150, Binary: 0.5417, Intensity: 0.4645, Regression: 3.5064
Using 16 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag9', 'AWS_lag1', 'AWS_rollmean_10', 'AWS_rollstd_10']
 Data shapes - Train: torch.Size([15558, 12, 16]), Val: torch.Size([7752, 12, 16]), Test: torch.Size([10009, 12, 16])




   Epoch 1/5, Total Loss: 13.9769, Binary: 0.6928, Intensity: 0.8989, Regression: 6.1065
   Epoch 2/5, Total Loss: 8.3520, Binary: 0.5814, Intensity: 0.5226, Regression: 3.5535
   Epoch 3/5, Total Loss: 7.6783, Binary: 0.5174, Intensity: 0.4422, Regression: 3.3121
   Epoch 4/5, Total Loss: 7.5143, Binary: 0.4755, Intensity: 0.4355, Regression: 3.2589
   Epoch 5/5, Total Loss: 7.3458, Binary: 0.4258, Intensity: 0.4295, Regression: 3.2078
Using 16 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag9', 'AWS_lag1', 'AWS_rollmean_10', 'AWS_rollstd_10']
 Data shapes - Train: torch.Size([23838, 12, 16]), Val: torch.Size([7717, 12, 16]), Test: torch.Size([9854, 12, 16])




   Epoch 1/5, Total Loss: 12.1748, Binary: 0.6620, Intensity: 0.7912, Regression: 5.2740
   Epoch 2/5, Total Loss: 7.8620, Binary: 0.5412, Intensity: 0.4701, Regression: 3.3685
   Epoch 3/5, Total Loss: 7.4546, Binary: 0.4592, Intensity: 0.4404, Regression: 3.2336
   Epoch 4/5, Total Loss: 7.3672, Binary: 0.4196, Intensity: 0.4359, Regression: 3.2173
   Epoch 5/5, Total Loss: 7.2394, Binary: 0.4047, Intensity: 0.4272, Regression: 3.1674
Using 16 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag9', 'AWS_lag1', 'AWS_rollmean_10', 'AWS_rollstd_10']
 Data shapes - Train: torch.Size([32118, 12, 16]), Val: torch.Size([7617, 12, 16]), Test: torch.Size([9860, 12, 16])




   Epoch 1/5, Total Loss: 10.6479, Binary: 0.6231, Intensity: 0.6604, Regression: 4.6122
   Epoch 2/5, Total Loss: 7.5245, Binary: 0.4831, Intensity: 0.4389, Regression: 3.2564
   Epoch 3/5, Total Loss: 7.2055, Binary: 0.3937, Intensity: 0.4216, Regression: 3.1618
   Epoch 4/5, Total Loss: 7.1032, Binary: 0.3742, Intensity: 0.4126, Regression: 3.1286
   Epoch 5/5, Total Loss: 6.9759, Binary: 0.3536, Intensity: 0.4083, Regression: 3.0788
Using 16 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag9', 'AWS_lag1', 'AWS_rollmean_10', 'AWS_rollstd_10']
 Data shapes - Train: torch.Size([40398, 12, 16]), Val: torch.Size([7451, 12, 16]), Test: torch.Size([9752, 12, 16])




   Epoch 1/5, Total Loss: 10.7429, Binary: 0.6189, Intensity: 0.6342, Regression: 4.6884
   Epoch 2/5, Total Loss: 7.5237, Binary: 0.4946, Intensity: 0.4321, Regression: 3.2546
   Epoch 3/5, Total Loss: 7.3217, Binary: 0.4668, Intensity: 0.4223, Regression: 3.1750
   Epoch 4/5, Total Loss: 7.1527, Binary: 0.4449, Intensity: 0.4164, Regression: 3.1055


[I 2025-05-06 18:19:20,499] Trial 5 finished with value: 1.2292535781860352 and parameters: {'hidden_size': 121, 'num_layers': 1, 'dropout': 0.2976796326008047, 'lr': 0.00027336759577519063, 'time_step_in': 12, 'time_step_out': 1, 'stride': 4, 'binary_weight': 1.230996253968206, 'intensity_weight': 1.696655256529891, 'regression_weight': 1.8994142497218727, 'rain_threshold': 0.08838353373898837, 'num_lags': 2, 'lag_0': 9, 'lag_1': 1, 'num_windows': 1, 'window_0': 10}. Best is trial 0 with value: 0.9751351237297058.


   Epoch 5/5, Total Loss: 7.0221, Binary: 0.4152, Intensity: 0.4095, Regression: 3.0621
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag9', 'AWS_rollmean_12', 'AWS_rollstd_12', 'AWS_rollmean_9', 'AWS_rollstd_9']
 Data shapes - Train: torch.Size([3518, 45, 17]), Val: torch.Size([4821, 45, 17]), Test: torch.Size([6710, 45, 17])
Error in fold fold_1: Target size (torch.Size([128, 6])) must be the same as input size (torch.Size([128, 1]))
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag9', 'AWS_rollmean_12', 'AWS_rollstd_12', 'AWS_rollmean_9', 'AWS_rollstd_9']
 Data shapes - Train: torch.Size([10126, 45, 17]), Val: torch.Size([4830, 45, 17]), Test: torch.Size([6486, 45, 17])
Error in fold fold_2: Target size (torch.Size([128, 6])) must be the same as input size (torch.Size([128, 1]))
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850'

[I 2025-05-06 18:19:52,257] Trial 6 finished with value: inf and parameters: {'hidden_size': 109, 'num_layers': 3, 'dropout': 0.30221753118590716, 'lr': 0.0031388930739835864, 'time_step_in': 45, 'time_step_out': 6, 'stride': 5, 'binary_weight': 1.0962498854582758, 'intensity_weight': 0.7084959002971971, 'regression_weight': 1.4847293966572253, 'rain_threshold': 0.13466827573592105, 'num_lags': 1, 'lag_0': 9, 'num_windows': 2, 'window_0': 12, 'window_1': 9}. Best is trial 0 with value: 0.9751351237297058.


 Data shapes - Train: torch.Size([29825, 45, 17]), Val: torch.Size([3952, 45, 17]), Test: torch.Size([5494, 45, 17])
Error in fold fold_5: Target size (torch.Size([128, 6])) must be the same as input size (torch.Size([128, 1]))
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag10', 'AWS_lag4', 'AWS_lag2', 'AWS_rollmean_12', 'AWS_rollstd_12']
 Data shapes - Train: torch.Size([4852, 14, 17]), Val: torch.Size([5137, 14, 17]), Test: torch.Size([6683, 14, 17])
   Epoch 1/5, Total Loss: 12.0610, Binary: 0.6613, Intensity: 0.7111, Regression: 5.0548
   Epoch 2/5, Total Loss: 9.5505, Binary: 0.5597, Intensity: 0.4763, Regression: 4.0708
   Epoch 3/5, Total Loss: 8.9434, Binary: 0.5389, Intensity: 0.4597, Regression: 3.7904
   Epoch 4/5, Total Loss: 8.7250, Binary: 0.5104, Intensity: 0.4517, Regression: 3.7028
   Epoch 5/5, Total Loss: 8.6960, Binary: 0.4311, Intensity: 0.4523, Regression: 3.7295
Using 17 features: ['TCW', 'T

[I 2025-05-06 18:20:27,308] Trial 7 finished with value: 1.2409850358963013 and parameters: {'hidden_size': 224, 'num_layers': 2, 'dropout': 0.2884302581604448, 'lr': 0.0022771156556749606, 'time_step_in': 14, 'time_step_out': 1, 'stride': 6, 'binary_weight': 1.0453900835037444, 'intensity_weight': 1.9811173843097414, 'regression_weight': 1.970591644169272, 'rain_threshold': 0.07879883401042768, 'num_lags': 3, 'lag_0': 10, 'lag_1': 4, 'lag_2': 2, 'num_windows': 1, 'window_0': 12}. Best is trial 0 with value: 0.9751351237297058.


   Epoch 5/5, Total Loss: 6.9733, Binary: 0.3300, Intensity: 0.3902, Regression: 2.9714
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag11', 'AWS_rollmean_16', 'AWS_rollstd_16', 'AWS_rollmean_12', 'AWS_rollstd_12']
 Data shapes - Train: torch.Size([5183, 25, 17]), Val: torch.Size([5738, 25, 17]), Test: torch.Size([7591, 25, 17])
   Epoch 1/5, Total Loss: 8.8467, Binary: 0.6584, Intensity: 0.7073, Regression: 6.4329
   Epoch 2/5, Total Loss: 7.1641, Binary: 0.6215, Intensity: 0.5958, Regression: 5.0110
   Epoch 3/5, Total Loss: 6.2266, Binary: 0.5752, Intensity: 0.5114, Regression: 4.3076
   Epoch 4/5, Total Loss: 5.7354, Binary: 0.5496, Intensity: 0.4722, Regression: 3.9314
   Epoch 5/5, Total Loss: 5.6064, Binary: 0.5598, Intensity: 0.4653, Regression: 3.7963
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag11', 'AWS_rollmean_16', 'AWS_rollstd_

[I 2025-05-06 18:22:11,868] Trial 8 finished with value: 1.1738453954458237 and parameters: {'hidden_size': 189, 'num_layers': 3, 'dropout': 0.300658904399111, 'lr': 0.005995452167759004, 'time_step_in': 25, 'time_step_out': 1, 'stride': 5, 'binary_weight': 1.7043697933237087, 'intensity_weight': 1.9534125923661887, 'regression_weight': 0.9860063236310186, 'rain_threshold': 0.07533561214271926, 'num_lags': 1, 'lag_0': 11, 'num_windows': 2, 'window_0': 16, 'window_1': 12}. Best is trial 0 with value: 0.9751351237297058.


Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_rollmean_23', 'AWS_rollstd_23']
 Data shapes - Train: torch.Size([5704, 45, 15]), Val: torch.Size([8033, 45, 15]), Test: torch.Size([11175, 45, 15])
Error in fold fold_1: Target size (torch.Size([128, 5])) must be the same as input size (torch.Size([128, 1]))




Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_rollmean_23', 'AWS_rollstd_23']
 Data shapes - Train: torch.Size([16736, 45, 15]), Val: torch.Size([8043, 45, 15]), Test: torch.Size([10798, 45, 15])
Error in fold fold_2: Target size (torch.Size([128, 5])) must be the same as input size (torch.Size([128, 1]))




Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_rollmean_23', 'AWS_rollstd_23']
 Data shapes - Train: torch.Size([27776, 45, 15]), Val: torch.Size([7638, 45, 15]), Test: torch.Size([10298, 45, 15])
Error in fold fold_3: Target size (torch.Size([128, 5])) must be the same as input size (torch.Size([128, 1]))




Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_rollmean_23', 'AWS_rollstd_23']
 Data shapes - Train: torch.Size([38816, 45, 15]), Val: torch.Size([7097, 45, 15]), Test: torch.Size([9695, 45, 15])
Error in fold fold_4: Target size (torch.Size([128, 5])) must be the same as input size (torch.Size([128, 1]))




Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_rollmean_23', 'AWS_rollstd_23']


[I 2025-05-06 18:22:52,003] Trial 9 finished with value: inf and parameters: {'hidden_size': 185, 'num_layers': 1, 'dropout': 0.0932146113081298, 'lr': 0.0006008215329215217, 'time_step_in': 45, 'time_step_out': 5, 'stride': 3, 'binary_weight': 1.9727089446878263, 'intensity_weight': 1.4594203082479327, 'regression_weight': 1.523481736309964, 'rain_threshold': 0.16268539128214057, 'num_lags': 1, 'lag_0': 12, 'num_windows': 1, 'window_0': 23}. Best is trial 0 with value: 0.9751351237297058.


 Data shapes - Train: torch.Size([49856, 45, 15]), Val: torch.Size([6501, 45, 15]), Test: torch.Size([9172, 45, 15])
Error in fold fold_5: Target size (torch.Size([128, 5])) must be the same as input size (torch.Size([128, 1]))
Using 20 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag5', 'AWS_lag10', 'AWS_rollmean_3', 'AWS_rollstd_3', 'AWS_rollmean_24', 'AWS_rollstd_24', 'AWS_rollmean_3', 'AWS_rollstd_3']
 Data shapes - Train: torch.Size([3851, 28, 20]), Val: torch.Size([4576, 28, 20]), Test: torch.Size([6186, 28, 20])
Error in fold fold_1: Target size (torch.Size([128, 3])) must be the same as input size (torch.Size([128, 1]))
Using 20 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag5', 'AWS_lag10', 'AWS_rollmean_3', 'AWS_rollstd_3', 'AWS_rollmean_24', 'AWS_rollstd_24', 'AWS_rollmean_3', 'AWS_rollstd_3']
 Data shapes - Train: torch.Size([9370, 28, 20]), Val: to

[I 2025-05-06 18:23:19,552] Trial 10 finished with value: inf and parameters: {'hidden_size': 179, 'num_layers': 2, 'dropout': 0.17093357421759126, 'lr': 0.0006783428545688714, 'time_step_in': 28, 'time_step_out': 3, 'stride': 6, 'binary_weight': 0.6507556265840806, 'intensity_weight': 1.064263976263821, 'regression_weight': 0.5727714151335965, 'rain_threshold': 0.18901055260350016, 'num_lags': 2, 'lag_0': 5, 'lag_1': 10, 'num_windows': 3, 'window_0': 3, 'window_1': 24, 'window_2': 3}. Best is trial 0 with value: 0.9751351237297058.


 Data shapes - Train: torch.Size([25930, 28, 20]), Val: torch.Size([4201, 28, 20]), Test: torch.Size([5614, 28, 20])
Error in fold fold_5: Target size (torch.Size([128, 3])) must be the same as input size (torch.Size([128, 1]))
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag6', 'AWS_rollmean_3', 'AWS_rollstd_3', 'AWS_rollmean_20', 'AWS_rollstd_20']
 Data shapes - Train: torch.Size([4897, 25, 17]), Val: torch.Size([5697, 25, 17]), Test: torch.Size([7555, 25, 17])
Error in fold fold_1: Target size (torch.Size([128, 2])) must be the same as input size (torch.Size([128, 1]))
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag6', 'AWS_rollmean_3', 'AWS_rollstd_3', 'AWS_rollmean_20', 'AWS_rollstd_20']
 Data shapes - Train: torch.Size([11705, 25, 17]), Val: torch.Size([5703, 25, 17]), Test: torch.Size([7433, 25, 17])
Error in fold fold_2: Target size (t

[I 2025-05-06 18:23:44,470] Trial 11 finished with value: inf and parameters: {'hidden_size': 191, 'num_layers': 2, 'dropout': 0.43227042763023366, 'lr': 0.00948170242509931, 'time_step_in': 25, 'time_step_out': 2, 'stride': 5, 'binary_weight': 1.5975198119997063, 'intensity_weight': 1.0352986509174362, 'regression_weight': 0.911185733583928, 'rain_threshold': 0.05441861917050089, 'num_lags': 1, 'lag_0': 6, 'num_windows': 2, 'window_0': 3, 'window_1': 20}. Best is trial 0 with value: 0.9751351237297058.


 Data shapes - Train: torch.Size([31495, 25, 17]), Val: torch.Size([5177, 25, 17]), Test: torch.Size([6954, 25, 17])
Error in fold fold_5: Target size (torch.Size([128, 2])) must be the same as input size (torch.Size([128, 1]))
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag7', 'AWS_rollmean_19', 'AWS_rollstd_19', 'AWS_rollmean_17', 'AWS_rollstd_17']
 Data shapes - Train: torch.Size([5230, 23, 17]), Val: torch.Size([5757, 23, 17]), Test: torch.Size([7627, 23, 17])
Error in fold fold_1: Target size (torch.Size([128, 2])) must be the same as input size (torch.Size([128, 1]))
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag7', 'AWS_rollmean_19', 'AWS_rollstd_19', 'AWS_rollmean_17', 'AWS_rollstd_17']
 Data shapes - Train: torch.Size([11797, 23, 17]), Val: torch.Size([5763, 23, 17]), Test: torch.Size([7547, 23, 17])
Error in fold fold_2: Target siz

[I 2025-05-06 18:24:08,131] Trial 12 finished with value: inf and parameters: {'hidden_size': 163, 'num_layers': 3, 'dropout': 0.1896250453034398, 'lr': 0.0012575431885031134, 'time_step_in': 23, 'time_step_out': 2, 'stride': 5, 'binary_weight': 1.5936421445868996, 'intensity_weight': 1.0889417789729956, 'regression_weight': 0.8666343757245492, 'rain_threshold': 0.1997265053339149, 'num_lags': 1, 'lag_0': 7, 'num_windows': 2, 'window_0': 19, 'window_1': 17}. Best is trial 0 with value: 0.9751351237297058.


 Data shapes - Train: torch.Size([31605, 23, 17]), Val: torch.Size([5394, 23, 17]), Test: torch.Size([6993, 23, 17])
Error in fold fold_5: Target size (torch.Size([128, 2])) must be the same as input size (torch.Size([128, 1]))
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_rollmean_17', 'AWS_rollstd_17', 'AWS_rollmean_7', 'AWS_rollstd_7']
 Data shapes - Train: torch.Size([4564, 33, 17]), Val: torch.Size([5412, 33, 17]), Test: torch.Size([7301, 33, 17])




   Epoch 1/5, Total Loss: 7.1009, Binary: 0.6204, Intensity: 0.6296, Regression: 6.1521
   Epoch 2/5, Total Loss: 4.2795, Binary: 0.4570, Intensity: 0.4887, Regression: 3.3021
   Epoch 3/5, Total Loss: 3.9565, Binary: 0.4109, Intensity: 0.4293, Regression: 3.1261
   Epoch 4/5, Total Loss: 3.8949, Binary: 0.3807, Intensity: 0.4360, Regression: 3.0904
   Epoch 5/5, Total Loss: 3.6655, Binary: 0.3496, Intensity: 0.4241, Regression: 2.8927
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_rollmean_17', 'AWS_rollstd_17', 'AWS_rollmean_7', 'AWS_rollstd_7']
 Data shapes - Train: torch.Size([11129, 33, 17]), Val: torch.Size([5432, 33, 17]), Test: torch.Size([7167, 33, 17])




   Epoch 1/5, Total Loss: 4.5913, Binary: 0.4928, Intensity: 0.4924, Regression: 3.6119
   Epoch 2/5, Total Loss: 3.6128, Binary: 0.3377, Intensity: 0.3996, Regression: 2.9061
   Epoch 3/5, Total Loss: 3.3225, Binary: 0.2886, Intensity: 0.3778, Regression: 2.6893
   Epoch 4/5, Total Loss: 3.3012, Binary: 0.2793, Intensity: 0.3730, Regression: 2.6912
   Epoch 5/5, Total Loss: 3.2705, Binary: 0.2779, Intensity: 0.3709, Regression: 2.6609
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_rollmean_17', 'AWS_rollstd_17', 'AWS_rollmean_7', 'AWS_rollstd_7']
 Data shapes - Train: torch.Size([17695, 33, 17]), Val: torch.Size([5275, 33, 17]), Test: torch.Size([6977, 33, 17])




   Epoch 1/5, Total Loss: 4.8461, Binary: 0.4793, Intensity: 0.4823, Regression: 3.9739
   Epoch 2/5, Total Loss: 3.7050, Binary: 0.3505, Intensity: 0.4123, Regression: 2.9669
   Epoch 3/5, Total Loss: 3.6287, Binary: 0.3127, Intensity: 0.3964, Regression: 2.9793
   Epoch 4/5, Total Loss: 3.3746, Binary: 0.2887, Intensity: 0.3761, Regression: 2.7572
   Epoch 5/5, Total Loss: 3.2224, Binary: 0.2794, Intensity: 0.3675, Regression: 2.6068
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_rollmean_17', 'AWS_rollstd_17', 'AWS_rollmean_7', 'AWS_rollstd_7']
 Data shapes - Train: torch.Size([24415, 33, 17]), Val: torch.Size([5084, 33, 17]), Test: torch.Size([6730, 33, 17])




   Epoch 1/5, Total Loss: 4.8833, Binary: 0.4790, Intensity: 0.5169, Regression: 3.9404
   Epoch 2/5, Total Loss: 3.6779, Binary: 0.3304, Intensity: 0.4102, Regression: 2.9754
   Epoch 3/5, Total Loss: 3.4507, Binary: 0.2900, Intensity: 0.3906, Regression: 2.8151
   Epoch 4/5, Total Loss: 3.3602, Binary: 0.2812, Intensity: 0.3796, Regression: 2.7452
   Epoch 5/5, Total Loss: 3.3808, Binary: 0.2773, Intensity: 0.3799, Regression: 2.7772
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_rollmean_17', 'AWS_rollstd_17', 'AWS_rollmean_7', 'AWS_rollstd_7']
 Data shapes - Train: torch.Size([31160, 33, 17]), Val: torch.Size([4862, 33, 17]), Test: torch.Size([6467, 33, 17])




   Epoch 1/5, Total Loss: 4.3380, Binary: 0.4182, Intensity: 0.4579, Regression: 3.5166
   Epoch 2/5, Total Loss: 3.7157, Binary: 0.3011, Intensity: 0.4129, Regression: 3.0695
   Epoch 3/5, Total Loss: 3.5319, Binary: 0.2816, Intensity: 0.3947, Regression: 2.9212
   Epoch 4/5, Total Loss: 3.4714, Binary: 0.2704, Intensity: 0.3927, Regression: 2.8720


[I 2025-05-06 18:24:58,371] Trial 13 finished with value: 1.0444024443626403 and parameters: {'hidden_size': 222, 'num_layers': 1, 'dropout': 0.18822986462146468, 'lr': 0.009928863888880066, 'time_step_in': 33, 'time_step_out': 1, 'stride': 5, 'binary_weight': 1.4955197430853386, 'intensity_weight': 1.874470727892307, 'regression_weight': 0.8116090451931186, 'rain_threshold': 0.15581700413898755, 'num_lags': 1, 'lag_0': 12, 'num_windows': 2, 'window_0': 17, 'window_1': 7}. Best is trial 0 with value: 0.9751351237297058.


   Epoch 5/5, Total Loss: 3.4240, Binary: 0.2674, Intensity: 0.3907, Regression: 2.8238
Using 20 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag4', 'AWS_lag8', 'AWS_rollmean_6', 'AWS_rollstd_6', 'AWS_rollmean_4', 'AWS_rollstd_4', 'AWS_rollmean_23', 'AWS_rollstd_23']
 Data shapes - Train: torch.Size([3518, 35, 20]), Val: torch.Size([4386, 35, 20]), Test: torch.Size([5951, 35, 20])
Error in fold fold_1: Target size (torch.Size([128, 3])) must be the same as input size (torch.Size([128, 1]))




Using 20 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag4', 'AWS_lag8', 'AWS_rollmean_6', 'AWS_rollstd_6', 'AWS_rollmean_4', 'AWS_rollstd_4', 'AWS_rollmean_23', 'AWS_rollstd_23']
 Data shapes - Train: torch.Size([9036, 35, 20]), Val: torch.Size([4408, 35, 20]), Test: torch.Size([5867, 35, 20])
Error in fold fold_2: Target size (torch.Size([128, 3])) must be the same as input size (torch.Size([128, 1]))




Using 20 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag4', 'AWS_lag8', 'AWS_rollmean_6', 'AWS_rollstd_6', 'AWS_rollmean_4', 'AWS_rollstd_4', 'AWS_rollmean_23', 'AWS_rollstd_23']
 Data shapes - Train: torch.Size([14556, 35, 20]), Val: torch.Size([4298, 35, 20]), Test: torch.Size([5720, 35, 20])
Error in fold fold_3: Target size (torch.Size([128, 3])) must be the same as input size (torch.Size([128, 1]))




Using 20 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag4', 'AWS_lag8', 'AWS_rollmean_6', 'AWS_rollstd_6', 'AWS_rollmean_4', 'AWS_rollstd_4', 'AWS_rollmean_23', 'AWS_rollstd_23']
 Data shapes - Train: torch.Size([20076, 35, 20]), Val: torch.Size([4100, 35, 20]), Test: torch.Size([5368, 35, 20])
Error in fold fold_4: Target size (torch.Size([128, 3])) must be the same as input size (torch.Size([128, 1]))




Using 20 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag4', 'AWS_lag8', 'AWS_rollmean_6', 'AWS_rollstd_6', 'AWS_rollmean_4', 'AWS_rollstd_4', 'AWS_rollmean_23', 'AWS_rollstd_23']


[I 2025-05-06 18:25:28,619] Trial 14 finished with value: inf and parameters: {'hidden_size': 225, 'num_layers': 1, 'dropout': 0.189272154019044, 'lr': 0.0003674777871416969, 'time_step_in': 35, 'time_step_out': 3, 'stride': 6, 'binary_weight': 1.3921018516459982, 'intensity_weight': 0.9047285627477635, 'regression_weight': 0.746534991685039, 'rain_threshold': 0.1647638418549816, 'num_lags': 2, 'lag_0': 4, 'lag_1': 8, 'num_windows': 3, 'window_0': 6, 'window_1': 4, 'window_2': 23}. Best is trial 0 with value: 0.9751351237297058.


 Data shapes - Train: torch.Size([25596, 35, 20]), Val: torch.Size([3923, 35, 20]), Test: torch.Size([5166, 35, 20])
Error in fold fold_5: Target size (torch.Size([128, 3])) must be the same as input size (torch.Size([128, 1]))
Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_rollmean_18', 'AWS_rollstd_18']
 Data shapes - Train: torch.Size([5610, 34, 15]), Val: torch.Size([6714, 34, 15]), Test: torch.Size([9055, 34, 15])




   Epoch 1/5, Total Loss: 9.0399, Binary: 0.6684, Intensity: 0.8765, Regression: 5.6119
   Epoch 2/5, Total Loss: 6.7235, Binary: 0.5560, Intensity: 0.5410, Regression: 4.2140
   Epoch 3/5, Total Loss: 6.0623, Binary: 0.5124, Intensity: 0.4702, Regression: 3.8040
   Epoch 4/5, Total Loss: 5.6841, Binary: 0.4751, Intensity: 0.4547, Regression: 3.5594
   Epoch 5/5, Total Loss: 5.5077, Binary: 0.4549, Intensity: 0.4409, Regression: 3.4548
Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_rollmean_18', 'AWS_rollstd_18']
 Data shapes - Train: torch.Size([13888, 34, 15]), Val: torch.Size([6723, 34, 15]), Test: torch.Size([8898, 34, 15])




   Epoch 1/5, Total Loss: 7.5307, Binary: 0.5874, Intensity: 0.6147, Regression: 4.7513
   Epoch 2/5, Total Loss: 5.7316, Binary: 0.4950, Intensity: 0.4437, Regression: 3.5855
   Epoch 3/5, Total Loss: 5.3294, Binary: 0.4274, Intensity: 0.4329, Regression: 3.3514
   Epoch 4/5, Total Loss: 4.9703, Binary: 0.3626, Intensity: 0.4076, Regression: 3.1622
   Epoch 5/5, Total Loss: 4.7603, Binary: 0.3298, Intensity: 0.3857, Regression: 3.0528
Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_rollmean_18', 'AWS_rollstd_18']
 Data shapes - Train: torch.Size([22168, 34, 15]), Val: torch.Size([6494, 34, 15]), Test: torch.Size([8605, 34, 15])




   Epoch 1/5, Total Loss: 6.8612, Binary: 0.5587, Intensity: 0.5706, Regression: 4.2923
   Epoch 2/5, Total Loss: 5.4471, Binary: 0.4391, Intensity: 0.4355, Regression: 3.4295
   Epoch 3/5, Total Loss: 5.0768, Binary: 0.3650, Intensity: 0.4136, Regression: 3.2385
   Epoch 4/5, Total Loss: 4.7330, Binary: 0.3326, Intensity: 0.3889, Regression: 3.0247
   Epoch 5/5, Total Loss: 4.6590, Binary: 0.3190, Intensity: 0.3823, Regression: 2.9874
Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_rollmean_18', 'AWS_rollstd_18']
 Data shapes - Train: torch.Size([30448, 34, 15]), Val: torch.Size([6216, 34, 15]), Test: torch.Size([8253, 34, 15])




   Epoch 1/5, Total Loss: 6.3967, Binary: 0.5335, Intensity: 0.5337, Regression: 3.9860
   Epoch 2/5, Total Loss: 5.1944, Binary: 0.3845, Intensity: 0.4317, Regression: 3.2931
   Epoch 3/5, Total Loss: 4.7052, Binary: 0.3254, Intensity: 0.3958, Regression: 3.0040
   Epoch 4/5, Total Loss: 4.5598, Binary: 0.3114, Intensity: 0.3868, Regression: 2.9125
   Epoch 5/5, Total Loss: 4.3182, Binary: 0.3064, Intensity: 0.3734, Regression: 2.7385
Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_rollmean_18', 'AWS_rollstd_18']
 Data shapes - Train: torch.Size([38728, 34, 15]), Val: torch.Size([5995, 34, 15]), Test: torch.Size([7959, 34, 15])




   Epoch 1/5, Total Loss: 6.4465, Binary: 0.5087, Intensity: 0.5366, Regression: 4.0507
   Epoch 2/5, Total Loss: 5.1349, Binary: 0.3969, Intensity: 0.4231, Regression: 3.2400
   Epoch 3/5, Total Loss: 4.6513, Binary: 0.3323, Intensity: 0.3937, Regression: 2.9553
   Epoch 4/5, Total Loss: 4.4656, Binary: 0.3107, Intensity: 0.3817, Regression: 2.8431


[I 2025-05-06 18:26:26,322] Trial 15 finished with value: 1.2559985637664794 and parameters: {'hidden_size': 252, 'num_layers': 1, 'dropout': 0.013690156939023479, 'lr': 0.0011238680108284324, 'time_step_in': 34, 'time_step_out': 1, 'stride': 4, 'binary_weight': 1.4081878130061996, 'intensity_weight': 1.2020056690487921, 'regression_weight': 1.255386324781085, 'rain_threshold': 0.16756058169368032, 'num_lags': 1, 'lag_0': 12, 'num_windows': 1, 'window_0': 18}. Best is trial 0 with value: 0.9751351237297058.


   Epoch 5/5, Total Loss: 4.3291, Binary: 0.3066, Intensity: 0.3758, Regression: 2.7447
Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag9', 'AWS_rollmean_6', 'AWS_rollstd_6', 'AWS_rollmean_8', 'AWS_rollstd_8']
 Data shapes - Train: torch.Size([6703, 39, 17]), Val: torch.Size([8570, 39, 17]), Test: torch.Size([11675, 39, 17])
Error in fold fold_1: Target size (torch.Size([128, 2])) must be the same as input size (torch.Size([128, 1]))




Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag9', 'AWS_rollmean_6', 'AWS_rollstd_6', 'AWS_rollmean_8', 'AWS_rollstd_8']
 Data shapes - Train: torch.Size([17738, 39, 17]), Val: torch.Size([8580, 39, 17]), Test: torch.Size([11395, 39, 17])
Error in fold fold_2: Target size (torch.Size([128, 2])) must be the same as input size (torch.Size([128, 1]))




Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag9', 'AWS_rollmean_6', 'AWS_rollstd_6', 'AWS_rollmean_8', 'AWS_rollstd_8']
 Data shapes - Train: torch.Size([28778, 39, 17]), Val: torch.Size([8241, 39, 17]), Test: torch.Size([10991, 39, 17])
Error in fold fold_3: Target size (torch.Size([128, 2])) must be the same as input size (torch.Size([128, 1]))




Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag9', 'AWS_rollmean_6', 'AWS_rollstd_6', 'AWS_rollmean_8', 'AWS_rollstd_8']
 Data shapes - Train: torch.Size([39818, 39, 17]), Val: torch.Size([7787, 39, 17]), Test: torch.Size([10390, 39, 17])
Error in fold fold_4: Target size (torch.Size([128, 2])) must be the same as input size (torch.Size([128, 1]))




Using 17 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag9', 'AWS_rollmean_6', 'AWS_rollstd_6', 'AWS_rollmean_8', 'AWS_rollstd_8']


[I 2025-05-06 18:27:08,821] Trial 16 finished with value: inf and parameters: {'hidden_size': 217, 'num_layers': 1, 'dropout': 0.22184437632710852, 'lr': 0.00035302342567352366, 'time_step_in': 39, 'time_step_out': 2, 'stride': 3, 'binary_weight': 0.8329936280161174, 'intensity_weight': 1.727434599286704, 'regression_weight': 0.5011198192556792, 'rain_threshold': 0.1464158869855329, 'num_lags': 1, 'lag_0': 9, 'num_windows': 2, 'window_0': 6, 'window_1': 8}. Best is trial 0 with value: 0.9751351237297058.


 Data shapes - Train: torch.Size([50858, 39, 17]), Val: torch.Size([7312, 39, 17]), Test: torch.Size([9993, 39, 17])
Error in fold fold_5: Target size (torch.Size([128, 2])) must be the same as input size (torch.Size([128, 1]))
Using 21 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag1', 'AWS_lag7', 'AWS_lag12', 'AWS_rollmean_24', 'AWS_rollstd_24', 'AWS_rollmean_8', 'AWS_rollstd_8', 'AWS_rollmean_3', 'AWS_rollstd_3']
 Data shapes - Train: torch.Size([4564, 31, 21]), Val: torch.Size([5412, 31, 21]), Test: torch.Size([7301, 31, 21])
Error in fold fold_1: Target size (torch.Size([128, 3])) must be the same as input size (torch.Size([128, 1]))




Using 21 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag1', 'AWS_lag7', 'AWS_lag12', 'AWS_rollmean_24', 'AWS_rollstd_24', 'AWS_rollmean_8', 'AWS_rollstd_8', 'AWS_rollmean_3', 'AWS_rollstd_3']
 Data shapes - Train: torch.Size([11129, 31, 21]), Val: torch.Size([5432, 31, 21]), Test: torch.Size([7167, 31, 21])
Error in fold fold_2: Target size (torch.Size([128, 3])) must be the same as input size (torch.Size([128, 1]))




Using 21 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag1', 'AWS_lag7', 'AWS_lag12', 'AWS_rollmean_24', 'AWS_rollstd_24', 'AWS_rollmean_8', 'AWS_rollstd_8', 'AWS_rollmean_3', 'AWS_rollstd_3']
 Data shapes - Train: torch.Size([17695, 31, 21]), Val: torch.Size([5275, 31, 21]), Test: torch.Size([6977, 31, 21])
Error in fold fold_3: Target size (torch.Size([128, 3])) must be the same as input size (torch.Size([128, 1]))




Using 21 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag1', 'AWS_lag7', 'AWS_lag12', 'AWS_rollmean_24', 'AWS_rollstd_24', 'AWS_rollmean_8', 'AWS_rollstd_8', 'AWS_rollmean_3', 'AWS_rollstd_3']
 Data shapes - Train: torch.Size([24415, 31, 21]), Val: torch.Size([5084, 31, 21]), Test: torch.Size([6730, 31, 21])
Error in fold fold_4: Target size (torch.Size([128, 3])) must be the same as input size (torch.Size([128, 1]))




Using 21 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag1', 'AWS_lag7', 'AWS_lag12', 'AWS_rollmean_24', 'AWS_rollstd_24', 'AWS_rollmean_8', 'AWS_rollstd_8', 'AWS_rollmean_3', 'AWS_rollstd_3']


[I 2025-05-06 18:27:41,470] Trial 17 finished with value: inf and parameters: {'hidden_size': 159, 'num_layers': 1, 'dropout': 0.36169911748051625, 'lr': 0.0047890209595500095, 'time_step_in': 31, 'time_step_out': 3, 'stride': 5, 'binary_weight': 1.3412878037051505, 'intensity_weight': 0.5441135794439322, 'regression_weight': 0.7506875568523435, 'rain_threshold': 0.18201324857293175, 'num_lags': 3, 'lag_0': 1, 'lag_1': 7, 'lag_2': 12, 'num_windows': 3, 'window_0': 24, 'window_1': 8, 'window_2': 3}. Best is trial 0 with value: 0.9751351237297058.


 Data shapes - Train: torch.Size([31160, 31, 21]), Val: torch.Size([4862, 31, 21]), Test: torch.Size([6467, 31, 21])
Error in fold fold_5: Target size (torch.Size([128, 3])) must be the same as input size (torch.Size([128, 1]))
Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_rollmean_16', 'AWS_rollstd_16']
 Data shapes - Train: torch.Size([4518, 19, 15]), Val: torch.Size([4922, 19, 15]), Test: torch.Size([6438, 19, 15])
Error in fold fold_1: Target size (torch.Size([128, 4])) must be the same as input size (torch.Size([128, 1]))
Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag12', 'AWS_rollmean_16', 'AWS_rollstd_16']
 Data shapes - Train: torch.Size([10038, 19, 15]), Val: torch.Size([4910, 19, 15]), Test: torch.Size([6341, 19, 15])
Error in fold fold_2: Target size (torch.Size([128, 4])) must be the same as input size (torch.Size([128

[I 2025-05-06 18:27:58,281] Trial 18 finished with value: inf and parameters: {'hidden_size': 216, 'num_layers': 2, 'dropout': 0.49751344096015226, 'lr': 0.0015666934839876836, 'time_step_in': 19, 'time_step_out': 4, 'stride': 6, 'binary_weight': 0.7210414018365665, 'intensity_weight': 1.3128312006367817, 'regression_weight': 1.1699617295079312, 'rain_threshold': 0.14959982415866094, 'num_lags': 1, 'lag_0': 12, 'num_windows': 1, 'window_0': 16}. Best is trial 0 with value: 0.9751351237297058.


 Data shapes - Train: torch.Size([26598, 19, 15]), Val: torch.Size([4501, 19, 15]), Test: torch.Size([6026, 19, 15])
Error in fold fold_5: Target size (torch.Size([128, 4])) must be the same as input size (torch.Size([128, 1]))
Using 18 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag10', 'AWS_lag12', 'AWS_rollmean_7', 'AWS_rollstd_7', 'AWS_rollmean_16', 'AWS_rollstd_16']
 Data shapes - Train: torch.Size([5943, 29, 18]), Val: torch.Size([6925, 29, 18]), Test: torch.Size([9294, 29, 18])




   Epoch 1/5, Total Loss: 14.4432, Binary: 0.6774, Intensity: 1.2251, Regression: 9.2472
   Epoch 2/5, Total Loss: 9.9780, Binary: 0.6518, Intensity: 0.9256, Regression: 6.2157
   Epoch 3/5, Total Loss: 7.7702, Binary: 0.6418, Intensity: 0.8492, Regression: 4.6702
   Epoch 4/5, Total Loss: 6.9307, Binary: 0.6245, Intensity: 0.8771, Regression: 4.0558
   Epoch 5/5, Total Loss: 6.4074, Binary: 0.6103, Intensity: 0.8387, Regression: 3.7101
Using 18 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag10', 'AWS_lag12', 'AWS_rollmean_7', 'AWS_rollstd_7', 'AWS_rollmean_16', 'AWS_rollstd_16']
 Data shapes - Train: torch.Size([14222, 29, 18]), Val: torch.Size([6974, 29, 18]), Test: torch.Size([9140, 29, 18])




   Epoch 1/5, Total Loss: 11.2783, Binary: 0.6772, Intensity: 0.9744, Regression: 7.1107
   Epoch 2/5, Total Loss: 7.0714, Binary: 0.6252, Intensity: 0.7900, Regression: 4.2119
   Epoch 3/5, Total Loss: 5.8938, Binary: 0.5569, Intensity: 0.6026, Regression: 3.5215
   Epoch 4/5, Total Loss: 5.3583, Binary: 0.4947, Intensity: 0.4608, Regression: 3.2638
   Epoch 5/5, Total Loss: 5.1236, Binary: 0.4448, Intensity: 0.4305, Regression: 3.1462
Using 18 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag10', 'AWS_lag12', 'AWS_rollmean_7', 'AWS_rollstd_7', 'AWS_rollmean_16', 'AWS_rollstd_16']
 Data shapes - Train: torch.Size([22502, 29, 18]), Val: torch.Size([6793, 29, 18]), Test: torch.Size([8874, 29, 18])




   Epoch 1/5, Total Loss: 9.2589, Binary: 0.6471, Intensity: 0.8114, Regression: 5.7693
   Epoch 2/5, Total Loss: 5.9908, Binary: 0.5836, Intensity: 0.5794, Regression: 3.5885
   Epoch 3/5, Total Loss: 5.5021, Binary: 0.5410, Intensity: 0.4536, Regression: 3.3417
   Epoch 4/5, Total Loss: 5.1587, Binary: 0.5074, Intensity: 0.4354, Regression: 3.1266
   Epoch 5/5, Total Loss: 4.9719, Binary: 0.4520, Intensity: 0.4275, Regression: 3.0333
Using 18 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag10', 'AWS_lag12', 'AWS_rollmean_7', 'AWS_rollstd_7', 'AWS_rollmean_16', 'AWS_rollstd_16']
 Data shapes - Train: torch.Size([30782, 29, 18]), Val: torch.Size([6583, 29, 18]), Test: torch.Size([8578, 29, 18])




   Epoch 1/5, Total Loss: 9.0132, Binary: 0.6634, Intensity: 0.7943, Regression: 5.5910
   Epoch 2/5, Total Loss: 5.4254, Binary: 0.5520, Intensity: 0.4651, Regression: 3.2715
   Epoch 3/5, Total Loss: 4.9622, Binary: 0.4940, Intensity: 0.4175, Regression: 3.0044
   Epoch 4/5, Total Loss: 4.6967, Binary: 0.4405, Intensity: 0.4069, Regression: 2.8544
   Epoch 5/5, Total Loss: 4.4887, Binary: 0.3842, Intensity: 0.4002, Regression: 2.7455
Using 18 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag10', 'AWS_lag12', 'AWS_rollmean_7', 'AWS_rollstd_7', 'AWS_rollmean_16', 'AWS_rollstd_16']
 Data shapes - Train: torch.Size([39062, 29, 18]), Val: torch.Size([6299, 29, 18]), Test: torch.Size([8333, 29, 18])




   Epoch 1/5, Total Loss: 8.4578, Binary: 0.6579, Intensity: 0.7035, Regression: 5.2491
   Epoch 2/5, Total Loss: 5.4230, Binary: 0.5151, Intensity: 0.4525, Regression: 3.3024
   Epoch 3/5, Total Loss: 4.9094, Binary: 0.4414, Intensity: 0.4189, Regression: 3.0004
   Epoch 4/5, Total Loss: 4.6711, Binary: 0.4057, Intensity: 0.4069, Regression: 2.8591


[I 2025-05-06 18:28:50,111] Trial 19 finished with value: 1.041409397125244 and parameters: {'hidden_size': 40, 'num_layers': 1, 'dropout': 0.13070759677342186, 'lr': 0.0005584213541169145, 'time_step_in': 29, 'time_step_out': 1, 'stride': 4, 'binary_weight': 0.9227891583776255, 'intensity_weight': 0.8649385268460922, 'regression_weight': 1.3797099515818585, 'rain_threshold': 0.1767590897952402, 'num_lags': 2, 'lag_0': 10, 'lag_1': 12, 'num_windows': 2, 'window_0': 7, 'window_1': 16}. Best is trial 0 with value: 0.9751351237297058.


   Epoch 5/5, Total Loss: 4.5136, Binary: 0.3834, Intensity: 0.4053, Regression: 2.7609
 Best parameters for 2020-10:
  hidden_size: 194
  num_layers: 1
  dropout: 0.2718704579992738
  lr: 0.00044891313482111033
  time_step_in: 31
  time_step_out: 1
  stride: 5
  binary_weight: 0.9829917900820302
  intensity_weight: 1.1452625794652946
  regression_weight: 0.9180316632117681
  rain_threshold: 0.19499582555291312
  num_lags: 1
  lag_0: 10
  num_windows: 1
  window_0: 3
 Best parameters saved to hybrid_best_params_2020-10.json

 Final evaluation on 2020-10, fold_1 test set

 Training hybrid model for 2020-10, fold_1 with best parameters
Using 15 features: ['TCW', 'TCLW', 'R250', 'R500', 'R850', 'U850', 'V850', 'EWSS', 'KX', 'CAPE', 'SSHF', 'PEV', 'AWS_lag10', 'AWS_rollmean_3', 'AWS_rollstd_3']
 Data shapes - Train: torch.Size([4564, 31, 15]), Val: torch.Size([5507, 31, 15]), Test: torch.Size([7374, 31, 15])




   Epoch 1/15, Total Loss: 8.3712, Binary: 0.6820, Intensity: 0.9303, Regression: 7.2277
   Epoch 2/15, Total Loss: 5.0978, Binary: 0.6333, Intensity: 0.8111, Regression: 3.8629
   Epoch 3/15, Total Loss: 4.3663, Binary: 0.5569, Intensity: 0.5477, Regression: 3.4766
   Epoch 4/15, Total Loss: 3.9628, Binary: 0.5169, Intensity: 0.4536, Regression: 3.1972
   Epoch 5/15, Total Loss: 3.8014, Binary: 0.4698, Intensity: 0.4335, Regression: 3.0970
   Epoch 6/15, Total Loss: 3.6819, Binary: 0.4348, Intensity: 0.4245, Regression: 3.0155
   Epoch 7/15, Total Loss: 3.5387, Binary: 0.4094, Intensity: 0.4156, Regression: 2.8979
   Epoch 8/15, Total Loss: 3.4981, Binary: 0.4012, Intensity: 0.4125, Regression: 2.8663
   Epoch 9/15, Total Loss: 3.3831, Binary: 0.3701, Intensity: 0.4045, Regression: 2.7843
   Epoch 10/15, Total Loss: 3.2152, Binary: 0.3625, Intensity: 0.3923, Regression: 2.6247
   Epoch 11/15, Total Loss: 3.1779, Binary: 0.3547, Intensity: 0.3892, Regression: 2.5963
   Epoch 12/15, Tot



   Epoch 1/15, Total Loss: 6.8089, Binary: 0.6468, Intensity: 0.8396, Regression: 5.6768
   Epoch 2/15, Total Loss: 4.1406, Binary: 0.5243, Intensity: 0.4642, Regression: 3.3698
   Epoch 3/15, Total Loss: 3.9099, Binary: 0.4884, Intensity: 0.4281, Regression: 3.2020
   Epoch 4/15, Total Loss: 3.7131, Binary: 0.4525, Intensity: 0.4146, Regression: 3.0429
   Epoch 5/15, Total Loss: 3.5534, Binary: 0.4103, Intensity: 0.4067, Regression: 2.9240
   Epoch 6/15, Total Loss: 3.4466, Binary: 0.3659, Intensity: 0.4046, Regression: 2.8578
   Epoch 7/15, Total Loss: 3.4001, Binary: 0.3494, Intensity: 0.3998, Regression: 2.8308
   Epoch 8/15, Total Loss: 3.3312, Binary: 0.3300, Intensity: 0.3917, Regression: 2.7867
   Epoch 9/15, Total Loss: 3.2063, Binary: 0.3285, Intensity: 0.3820, Regression: 2.6643
   Epoch 10/15, Total Loss: 3.1857, Binary: 0.3184, Intensity: 0.3776, Regression: 2.6582
   Epoch 11/15, Total Loss: 3.1133, Binary: 0.3141, Intensity: 0.3683, Regression: 2.5955
   Epoch 12/15, Tot



   Epoch 1/15, Total Loss: 5.4796, Binary: 0.6086, Intensity: 0.6738, Regression: 4.4766
   Epoch 2/15, Total Loss: 3.7620, Binary: 0.4882, Intensity: 0.4331, Regression: 3.0349
   Epoch 3/15, Total Loss: 3.4954, Binary: 0.4007, Intensity: 0.4187, Regression: 2.8561
   Epoch 4/15, Total Loss: 3.3214, Binary: 0.3468, Intensity: 0.4009, Regression: 2.7465
   Epoch 5/15, Total Loss: 3.2300, Binary: 0.3126, Intensity: 0.3913, Regression: 2.6955
   Epoch 6/15, Total Loss: 3.1317, Binary: 0.2956, Intensity: 0.3808, Regression: 2.6198
   Epoch 7/15, Total Loss: 3.0988, Binary: 0.2908, Intensity: 0.3717, Regression: 2.6005
   Epoch 8/15, Total Loss: 3.0898, Binary: 0.2894, Intensity: 0.3777, Regression: 2.5847
   Epoch 9/15, Total Loss: 3.0045, Binary: 0.2822, Intensity: 0.3663, Regression: 2.5136
   Epoch 10/15, Total Loss: 2.9771, Binary: 0.2787, Intensity: 0.3637, Regression: 2.4907
   Epoch 11/15, Total Loss: 2.9407, Binary: 0.2769, Intensity: 0.3599, Regression: 2.4579
   Epoch 12/15, Tot



   Epoch 1/15, Total Loss: 4.9236, Binary: 0.5552, Intensity: 0.6569, Regression: 3.9492
   Epoch 2/15, Total Loss: 3.3668, Binary: 0.3868, Intensity: 0.4040, Regression: 2.7492
   Epoch 3/15, Total Loss: 3.1533, Binary: 0.3421, Intensity: 0.3842, Regression: 2.5892
   Epoch 4/15, Total Loss: 3.0457, Binary: 0.3057, Intensity: 0.3736, Regression: 2.5242
   Epoch 5/15, Total Loss: 2.9655, Binary: 0.2870, Intensity: 0.3659, Regression: 2.4665
   Epoch 6/15, Total Loss: 2.9160, Binary: 0.2760, Intensity: 0.3596, Regression: 2.4322
   Epoch 7/15, Total Loss: 2.8914, Binary: 0.2713, Intensity: 0.3581, Regression: 2.4123
   Epoch 8/15, Total Loss: 2.8322, Binary: 0.2710, Intensity: 0.3504, Regression: 2.3578
   Epoch 9/15, Total Loss: 2.8271, Binary: 0.2713, Intensity: 0.3496, Regression: 2.3529
   Epoch 10/15, Total Loss: 2.7268, Binary: 0.2621, Intensity: 0.3408, Regression: 2.2646
   Epoch 11/15, Total Loss: 2.7262, Binary: 0.2614, Intensity: 0.3399, Regression: 2.2657
   Epoch 12/15, Tot



   Epoch 1/15, Total Loss: 4.5742, Binary: 0.5318, Intensity: 0.5526, Regression: 3.7237
   Epoch 2/15, Total Loss: 3.5106, Binary: 0.4174, Intensity: 0.4083, Regression: 2.8677
   Epoch 3/15, Total Loss: 3.2985, Binary: 0.3911, Intensity: 0.3909, Regression: 2.6867
   Epoch 4/15, Total Loss: 3.1600, Binary: 0.3581, Intensity: 0.3795, Regression: 2.5853
   Epoch 5/15, Total Loss: 3.0780, Binary: 0.3248, Intensity: 0.3737, Regression: 2.5388
   Epoch 6/15, Total Loss: 3.0045, Binary: 0.3021, Intensity: 0.3633, Regression: 2.4961
   Epoch 7/15, Total Loss: 2.9437, Binary: 0.2825, Intensity: 0.3570, Regression: 2.4585
   Epoch 8/15, Total Loss: 2.8819, Binary: 0.2743, Intensity: 0.3502, Regression: 2.4086
   Epoch 9/15, Total Loss: 2.8524, Binary: 0.2631, Intensity: 0.3468, Regression: 2.3927
   Epoch 10/15, Total Loss: 2.8149, Binary: 0.2623, Intensity: 0.3479, Regression: 2.3514
   Epoch 11/15, Total Loss: 2.7691, Binary: 0.2590, Intensity: 0.3405, Regression: 2.3141
   Epoch 12/15, Tot