## Working code for single-stream forecasting using univariate and multivariate approaches

- Added the ability to save results
- Added multivariate XGB seq2seq model (bidirectional)
- Use flags (e.g., in a dictionary or list) to control which models to add
- Added multivariate UniSeq2Seq XGB model (unidirectional)
- Added multivariate bidirectional RF Seq2Seq model
- v. 5
- Added multivariate unidirectional autoregressive XGB model (uniautoreg_xgb)
- Added multivariate unidirectional UniSeq2Seq CNN Multi model
- V. 6
- Added multivariate unidirectional UniSeq2Seq LSTM Multi model
- Implemented a flag-based approach for adding models to the combiner
- ! There might be data leaks, making the model too successful.
- V.7
- Data leaks have been fixed
- V. 8
- Added multivariate bidirectional XGB Autoreg Multi model
- Added index-based model addition to the combiner

In [None]:
import tensorflow as tf
print("TensorFlow Version:", tf.__version__)
print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))

In [None]:
# import torch
# import subprocess
# import sys
# import platform
# import psutil
# import GPUtil

# # CPU information
# def get_cpu_info():
#     cpu_info = {}
#     cpu_info['processor'] = platform.processor()
#     cpu_info['physical_cores'] = psutil.cpu_count(logical=False)
#     cpu_info['total_cores'] = psutil.cpu_count(logical=True)
    
#     # For Linux, more detailed information can be obtained
#     if platform.system() == "Linux":
#         try:
#             cpu_model = subprocess.check_output("cat /proc/cpuinfo | grep 'model name' | uniq", shell=True).decode().strip().split(':')[1]
#             cpu_info['model'] = cpu_model
#         except:
#             pass
    
#     return cpu_info

# # GPU information via TensorFlow
# def get_tf_gpu_info():
#     gpus = tf.config.list_physical_devices('GPU')
#     gpu_info = []
#     for gpu in gpus:
#         gpu_details = tf.config.experimental.get_device_details(gpu)
#         gpu_info.append(gpu_details)
#     return gpu_info

# # GPU information via PyTorch
# def get_torch_gpu_info():
#     if torch.cuda.is_available():
#         gpu_info = {
#             'cuda_available': True,
#             'device_count': torch.cuda.device_count(),
#             'current_device': torch.cuda.current_device(),
#             'device_name': torch.cuda.get_device_name(0),
#             'cuda_version': torch.version.cuda,
#         }
#     else:
#         gpu_info = {'cuda_available': False}
#     return gpu_info

# # GPU information via GPUtil
# def get_gputil_info():
#     gpus = GPUtil.getGPUs()
#     gpu_info = []
#     for gpu in gpus:
#         info = {
#             'id': gpu.id,
#             'name': gpu.name,
#             'memory_total': gpu.memoryTotal,
#             'driver': gpu.driver,
#         }
#         gpu_info.append(info)
#     return gpu_info

# # NVIDIA information via subprocess
# def get_nvidia_smi_info():
#     try:
#         nvidia_smi = subprocess.check_output('nvidia-smi --query-gpu=name,memory.total,driver_version --format=csv,noheader', shell=True).decode()
#         return nvidia_smi
#     except:
#         return "NVIDIA-SMI failed or NVIDIA driver is not installed"

# # RAM information
# def get_ram_info():
#     ram = psutil.virtual_memory()
#     ram_info = {
#         'total': ram.total / (1024**3),  # GB
#         'available': ram.available / (1024**3),  # GB
#         'percent_used': ram.percent
#     }
#     return ram_info

# # Collecting all information
# system_info = {
#     'platform': platform.platform(),
#     'python_version': sys.version,
#     'cpu': get_cpu_info(),
#     'ram': get_ram_info(),
#     'tf_gpu': get_tf_gpu_info(),
#     'torch_gpu': get_torch_gpu_info(),
#     'gputil': get_gputil_info(),
#     'nvidia_smi': get_nvidia_smi_info()
# }

# print(system_info)

In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.impute import SimpleImputer
from statsmodels.tsa.arima.model import ARIMA
import tensorflow as tf
from tcn import TCN
from tensorflow.keras.layers import LSTM, GRU, Conv1D, SimpleRNN, Dense, Flatten, Input, Concatenate, Bidirectional, RepeatVector, TimeDistributed, Reshape
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from xgboost import XGBRegressor
from tabulate import tabulate
import time
import matplotlib.pyplot as plt
from scipy import interpolate
import seaborn as sns
import gc
import tensorflow.keras.backend as K
import re

# Variables for creating and training models
EPOCHS = 200
BATCH_SIZE = 32
PATIENCE = 20
N_ESTIMATORS = 50

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)


In [None]:
def load_and_preprocess_data(
    file_path="df_data_prepared.csv",
    column="pm2_5",
    multivariate=False,
    train_size_ratio=0.8
):

    # 1) Load and sort
    df = pd.read_csv(file_path, encoding="utf-8", parse_dates=["date"])
    df = df.dropna(subset=[column]).reset_index(drop=True)
    df.sort_values("date", inplace=True)

    # 2) Add "pm2_5_original" column to have the "true" values later
    df[f"{column}_original"] = df[column]

    # 3) Split chronologically: first 80% -> train, last 20% -> test
    n = len(df)
    train_size = int(n * train_size_ratio)
    df_train = df.iloc[:train_size]
    df_test = df.iloc[train_size:]

    # 4) Train scaler on the train part, then transform the test part
    scaler = StandardScaler()

    if not multivariate:
        # --- UNIVARIATE ---
        train_data = df_train[column].values.reshape(-1, 1)
        test_data = df_test[column].values.reshape(-1, 1)

        scaler.fit(train_data)

        train_data_scaled = scaler.transform(train_data).flatten()
        test_data_scaled = scaler.transform(test_data).flatten()

        # Concatenate
        data_scaled = np.concatenate([train_data_scaled, test_data_scaled], axis=0)

    else:
        # --- MULTIVARIATE ---
        df_train['hour'] = df_train['date'].dt.hour
        df_train['season'] = (df_train['date'].dt.month % 12 // 3).astype(int)
        df_test['hour'] = df_test['date'].dt.hour
        df_test['season'] = (df_test['date'].dt.month % 12 // 3).astype(int)

        # Approximate list of features + original value of pm2_5_original
        numeric_cols = [
            'pm2_5', 'air_temperature', 'air_humidity', 'T', 'P0',
            'P', 'U', 'DD', 'Ff', 'VV', 'pm2_5_original', 'hour', 'season'
        ]
        # Remove those that are not in df
        numeric_cols = [c for c in numeric_cols if c in df_train.columns and c in df_test.columns]

        train_data = df_train[numeric_cols].values
        test_data = df_test[numeric_cols].values

        scaler.fit(train_data)
        train_data_scaled = scaler.transform(train_data)
        test_data_scaled = scaler.transform(test_data)

        data_scaled = np.vstack([train_data_scaled, test_data_scaled])

    # Check
    assert data_scaled.shape[0] == len(df), "data_scaled and df have different lengths!"

    return df, data_scaled, scaler

def time_based_split(X, y, train_size_ratio=0.8):
    """
    Splits X and y into train/test chronologically: 
    first train_size_ratio%, then the rest.
    """
    n = len(X)
    train_size = int(n * train_size_ratio)
    X_train = X[:train_size]
    y_train = y[:train_size]
    X_test = X[train_size:]
    y_test = y[train_size:]
    return X_train, X_test, y_train, y_test

def introduce_synthetic_gaps(df, column="pm2_5", missing_fraction=0.05, gap_length=12, random_state=42):
    np.random.seed(random_state)
    df_missing = df.copy()
    n = len(df_missing)
    n_gaps = int(n * missing_fraction / gap_length)
    gap_indices = np.random.choice(np.arange(gap_length, n - gap_length * 2), n_gaps, replace=False)
    
    # Add detailed debugging
    # print(f"Total gaps to create: {n_gaps}")
    # print(f"Sample gap start indices: {gap_indices[:5]}")
    # print(f"Gap length: {gap_length}")
    # print(f"Data length: {n}")
    # print(f"NaN count in {column} before gaps: {df_missing[column].isna().sum()}")
    
    for idx in gap_indices:
        df_missing.loc[idx:idx + gap_length - 1, column] = np.nan
    
    # print(f"NaN count in {column} after gaps: {df_missing[column].isna().sum()}")
    # print(f"Sample data after gaps with NaN:\n{df_missing[column].head(20).isna().sum()}")
    
    return df_missing, gap_indices

def evaluate_model(y_true, y_pred):
    # print(f"y_true shape: {y_true.shape}, y_pred shape: {y_pred.shape}")
    # print(f"y_true sample: {y_true[:5]}, y_pred sample: {y_pred[:5]}")
    epsilon = 1e-8
    mae = round(mean_absolute_error(y_true.ravel(), y_pred.ravel()), 3)
    rmse = round(np.sqrt(mean_squared_error(y_true.ravel(), y_pred.ravel())), 3)
    r2 = round(r2_score(y_true.ravel(), y_pred.ravel()), 3)
    mape = round(np.mean(np.abs((y_true - y_pred) / (y_true + epsilon))) * 100, 3)
    return mae, rmse, r2, mape

def combine_forecasts(forward_forecast, backward_forecast):
    gap_length = len(forward_forecast)
    combined = np.zeros_like(forward_forecast)
    for t in range(gap_length):
        w = 1 - t/(gap_length - 1) if gap_length > 1 else 0.5
        combined[t] = w * forward_forecast[t] + (1 - w) * backward_forecast[t]
    return combined


# Data preparation functions 
def create_forward_data_seq2seq(data, pre_context_length, gap_length):
    X_forward, y_forward = [], []
    for i in range(pre_context_length, len(data) - gap_length):
        X_forward.append(data[i - pre_context_length:i].reshape(-1, 1))
        y_forward.append(data[i:i + gap_length].reshape(-1, 1))
    return np.array(X_forward), np.array(y_forward)

def create_backward_data_seq2seq(data, post_context_length, gap_length):
    X_backward, y_backward = [], []
    for i in range(gap_length, len(data) - post_context_length + 1):
        X = data[i:i + post_context_length].reshape(-1, 1)
        y = data[i - gap_length:i].reshape(-1, 1)
        X_backward.append(np.flipud(X))
        y_backward.append(np.flipud(y))
    return np.array(X_backward), np.array(y_backward)

def create_forward_data_autoreg(data, pre_context_length):
    X_forward, y_forward = [], []
    for i in range(pre_context_length, len(data) - 1):
        X_forward.append(data[i - pre_context_length:i].reshape(-1, 1))
        y_forward.append(data[i])  # Predict one value
    return np.array(X_forward), np.array(y_forward)

def create_backward_data_autoreg(data, post_context_length):
    X_backward, y_backward = [], []
    for i in range(1, len(data) - post_context_length):
        X = data[i:i + post_context_length].reshape(-1, 1)
        y = data[i - 1]  # Predict one value back
        X_backward.append(np.flipud(X))
        y_backward.append(y)
    return np.array(X_backward), np.array(y_backward)

def create_uniseq2seq_data(data, pre_context_length, gap_length, post_context_length):
    X_left, X_right, y = [], [], []
    for i in range(pre_context_length, len(data) - gap_length - post_context_length):
        left_context = data[i - pre_context_length:i].reshape(-1, 1)
        right_context = data[i + gap_length:i + gap_length + post_context_length].reshape(-1, 1)
        gap = data[i:i + gap_length].reshape(-1, 1)
        X_left.append(left_context)
        X_right.append(right_context)
        y.append(gap)
    return np.array(X_left), np.array(X_right), np.array(y)

def create_uniseq2seq_data_multi(data, pre_context_length, gap_length, post_context_length, feature_cols=None):
    # Input data validation
    if not isinstance(data, np.ndarray):
        raise TypeError(f"Input data must be a numpy array, got {type(data)}")
    if len(data.shape) != 2:
        raise ValueError(f"Input data must be 2D, got shape {data.shape}")
    
    print(f"Debug: Input data shape in create_uniseq2seq_data_multi: {data.shape}")
    print(f"Debug: Input data type in create_uniseq2seq_data_multi: {type(data)}")
    print(f"Debug: feature_cols: {feature_cols}")
    
    if feature_cols is None:
        feature_cols = [0]  # Default to only pm2_5
    
    # Validation of feature_cols
    if not all(0 <= idx < data.shape[1] for idx in feature_cols):
        raise ValueError(f"Feature indices {feature_cols} are out of bounds for data with {data.shape[1]} columns")
    
    X_left, X_right, y = [], [], []
    for i in range(pre_context_length, len(data) - gap_length - post_context_length):
        left_context = data[i - pre_context_length:i, feature_cols].reshape(pre_context_length, -1)
        right_context = data[i + gap_length:i + gap_length + post_context_length, feature_cols].reshape(post_context_length, -1)
        gap = data[i:i + gap_length, 0].reshape(gap_length, 1)  # pm2_5 as the target
        X_left.append(left_context)
        X_right.append(right_context)
        y.append(gap)
    return np.array(X_left), np.array(X_right), np.array(y)

def create_forward_data_seq2seq_multi(data, pre_context_length, gap_length, feature_cols):
    """
    Forms data for forward seq2seq forecasting in multivariate mode.
    For each example, a context of specified features (feature_cols)
    of size (pre_context_length, len(feature_cols)) is selected, and a target interval from the first feature.
    """
    X_forward, y_forward = [], []
    for i in range(pre_context_length, len(data) - gap_length):
        context = data[i - pre_context_length:i, feature_cols]  # (pre_context_length, n_features)
        target = data[i:i + gap_length, 0].reshape(-1, 1)         # only the target feature
        X_forward.append(context)
        y_forward.append(target)
    return np.array(X_forward), np.array(y_forward)

def create_backward_data_seq2seq_multi(data, post_context_length, gap_length, feature_cols):
    """
    Forms data for backward seq2seq forecasting in multivariate mode.
    A context after the gap (with specified features) and a target interval from the first feature are extracted.
    Before use, the input context is reversed in time.
    """
    X_backward, y_backward = [], []
    for i in range(gap_length, len(data) - post_context_length + 1):
        X = data[i:i + post_context_length, feature_cols]
        y = data[i - gap_length:i, 0].reshape(-1, 1)
        X_backward.append(np.flipud(X))
        y_backward.append(np.flipud(y))
    return np.array(X_backward), np.array(y_backward)

def create_forward_data_autoreg_multi(data, pre_context_length, feature_cols):
    """
    Forms data for autoregression in multivariate mode.
    For each example, a window (pre_context_length, n_selected) is extracted,
    where n_selected = len(feature_cols), and the target value is the value of the target feature from the first position (feature_cols[0]).
    """
    X_forward, y_forward = [], []
    for i in range(pre_context_length, len(data) - 1):
        # Extract window only for selected features
        context = data[i - pre_context_length:i][:, feature_cols]
        X_forward.append(context)
        # Target is the value of the target feature (feature_cols[0]) at the current position
        y_forward.append(data[i, feature_cols[0]])
    return np.array(X_forward), np.array(y_forward)

def create_backward_data_autoreg_multi(data, post_context_length, feature_cols):
    """
    Forms data for the backward part of multivariate single-step autoregression.
    
    Parameters:
      - data: numpy array of shape (n_samples, n_total_features).
      - post_context_length: length of the "window" (number of steps) after the gap,
        which will be used as the context in the reverse direction.
      - feature_cols: list of feature indices to take from data
        (e.g., [0, 1, 2]).

    Returns:
      - X_backward: array of shape (n_samples_b, post_context_length, n_selected_features),
        where n_selected_features = len(feature_cols).
        Each window example is taken in forward order but is reversed
        along the time axis (np.flipud) before saving.
      - y_backward: vector of shape (n_samples_b, ) with values of the target feature (feature_cols[0])
        "one step earlier" (data[i-1, feature_cols[0]]).
    """
    X_backward, y_backward = [], []
    n = len(data)
    for i in range(1, n - post_context_length):
        # Extract a window of length post_context_length, starting from i, for the required features
        window = data[i : i + post_context_length, feature_cols]  # (post_context_length, len(feature_cols))
        
        # Reverse along the time axis (as in the univariate version)
        window_flipped = np.flipud(window)
        
        # The target value is taken "one step earlier" — which is i - 1
        # Use the first feature from feature_cols as the target
        target_value = data[i - 1, feature_cols[0]]
        
        X_backward.append(window_flipped)
        y_backward.append(target_value)
    
    return np.array(X_backward), np.array(y_backward)


def create_uniseq2seq_data_multi_forward(data, pre_context_length, gap_length, feature_cols=None):
    """
    Forms data for a unidirectional (using only the left context) seq2seq forecast
    in multivariate mode.
    
    For each example, a left context with selected features of size
    (pre_context_length, n_selected) and a target interval from the first feature (feature_cols[0]) are extracted.
    """
    if feature_cols is None:
        feature_cols = [0]
    X_left, y = [], []
    for i in range(pre_context_length, len(data) - gap_length + 1):
        left_context = data[i - pre_context_length:i, feature_cols]  # (pre_context_length, n_selected)
        gap = data[i:i + gap_length, 0].reshape(gap_length, 1)         # target is only pm2_5
        X_left.append(left_context)
        y.append(gap)
    return np.array(X_left), np.array(y)







def test_synthetic_gaps_imputation_efficiency(df, data_scaled, gap_indices, forecast_func, 
                                              forward_model, backward_model, 
                                              pre_context_length, gap_length, post_context_length, 
                                              scaler, model_name, model_info):
    all_true_gaps = []
    all_pred_gaps = []
    # If feature_cols are set for the model, extract them from model_info
    fc = model_info.get("feature_cols", None)
    for gap_idx in gap_indices:
        if gap_idx - pre_context_length < 0 or gap_idx + gap_length + post_context_length > len(data_scaled):
            continue

        # If data is multivariate and feature_cols are specified, select only these columns
        if fc is not None and isinstance(data_scaled, np.ndarray) and data_scaled.ndim == 2:
            initial_context_forward = data_scaled[gap_idx - pre_context_length : gap_idx][:, fc]
            initial_context_backward = data_scaled[gap_idx + gap_length : gap_idx + gap_length + post_context_length][:, fc]
        else:
            initial_context_forward = data_scaled[gap_idx - pre_context_length : gap_idx]
            initial_context_backward = data_scaled[gap_idx + gap_length : gap_idx + gap_length + post_context_length]

        # Forward and backward forecasting
        forward_forecast_scaled = forecast_func(forward_model, initial_context_forward, gap_length)
        initial_context_backward = np.flipud(initial_context_backward)
        backward_forecast_scaled = forecast_func(backward_model, initial_context_backward, gap_length)
        backward_forecast_scaled = np.flipud(backward_forecast_scaled)
        combined_forecast_scaled = combine_forecasts(forward_forecast_scaled, backward_forecast_scaled)
        
        # Inverse scaling: if scaler is trained on multivariate data, apply it only to the target feature (index 0)
        if hasattr(scaler, 'mean_') and scaler.mean_.shape[0] > 1:
            mean_pm25 = scaler.mean_[0]
            scale_pm25 = scaler.scale_[0]
            combined_forecast = combined_forecast_scaled * scale_pm25 + mean_pm25
        else:
            combined_forecast = scaler.inverse_transform(combined_forecast_scaled)
        
        true_gap = df.loc[gap_idx : gap_idx + gap_length - 1, "pm2_5_original"].values.reshape(-1, 1)
        if np.any(np.isnan(combined_forecast)) or np.any(np.isnan(true_gap)):
            continue
        all_pred_gaps.append(combined_forecast)
        all_true_gaps.append(true_gap)
    if len(all_true_gaps) == 0 or len(all_pred_gaps) == 0:
        raise ValueError("No processed gaps to calculate metrics.")
    all_true = np.concatenate(all_true_gaps)
    all_pred = np.concatenate(all_pred_gaps)
    return all_true, all_pred




def test_synthetic_gaps_imputation_efficiency_uniseq2seq(df, data_scaled, gap_indices, forecast_func, model, 
                                                          pre_context_length, gap_length, post_context_length, 
                                                          scaler, model_name):
    all_true = []
    all_pred = []
    # Get the list of features from the model if it's saved
    if hasattr(model, 'model_info') and 'feature_cols' in model.model_info:
        feature_cols = model.model_info['feature_cols']
    else:
        feature_cols = [0]  # default is only pm2_5

    for gap_idx in gap_indices:
        # Check window boundaries
        if gap_idx - pre_context_length < 0 or gap_idx + gap_length + post_context_length > len(data_scaled):
            continue
        
        # If the method is unidirectional (e.g., "UniSeq2Seq LSTM Multi" or "UniSeq2Seq CNN Multi"),
        # use only the left context.
        if model_name in ["UniSeq2Seq LSTM Multi", "UniSeq2Seq CNN Multi"]:
            initial_context = data_scaled[gap_idx - pre_context_length:gap_idx][:, feature_cols]
        else:
            # For the bidirectional variant, extract and combine the left and right contexts
            if len(feature_cols) == 1 and feature_cols[0] == 0:
                left_context = data_scaled[gap_idx - pre_context_length:gap_idx].reshape(-1, 1)
                right_context = data_scaled[gap_idx + gap_length:gap_idx + gap_length + post_context_length].reshape(-1, 1)
            else:
                left_context = data_scaled[gap_idx - pre_context_length:gap_idx, feature_cols]
                right_context = data_scaled[gap_idx + gap_length:gap_idx + gap_length + post_context_length, feature_cols]
            initial_context = np.concatenate([left_context, right_context])
        
        # Get the forecast
        forecast_scaled = forecast_func(model, initial_context, gap_length)
        
        # Inverse scaling: if the scaler is trained on multivariate data,
        # apply it only to the target feature (assuming it's scaler.mean_[0])
        if hasattr(scaler, 'mean_') and scaler.mean_.shape[0] > 1:
            forecast = forecast_scaled * float(scaler.scale_[0]) + float(scaler.mean_[0])
            forecast = forecast.reshape(-1, 1)
        else:
            forecast = scaler.inverse_transform(forecast_scaled)
        
        true_gap = df.loc[gap_idx:gap_idx + gap_length - 1, "pm2_5_original"].values.reshape(-1, 1)
        if not np.any(np.isnan(true_gap)) and not np.any(np.isnan(forecast)):
            all_true.append(true_gap.ravel())
            all_pred.append(forecast.ravel())
        else:
            print(f"Skipping gap_idx={gap_idx} due to NaN")
    
    if not all_true or not all_pred:
        raise ValueError("No processed gaps to calculate metrics.")
    all_true = np.concatenate(all_true)
    all_pred = np.concatenate(all_pred)
    return all_true, all_pred



def test_synthetic_gaps_imputation_efficiency_uniautoreg(df, data_scaled, gap_indices, forecast_func, model,
                                                         pre_context_length, gap_length, scaler, method_name):
    all_true_gaps = []
    all_pred_gaps = []
    for gap_idx in gap_indices:
        if gap_idx - pre_context_length < 0 or gap_idx + gap_length > len(data_scaled):
            continue
        initial_context = data_scaled[gap_idx - pre_context_length:gap_idx].reshape(-1, 1)
        forecast_scaled = forecast_func(model, initial_context, gap_length)
        forecast = scaler.inverse_transform(forecast_scaled)
        true_gap = df.loc[gap_idx:gap_idx + gap_length - 1, "pm2_5_original"].values.reshape(-1, 1)
        if np.any(np.isnan(forecast)) or np.any(np.isnan(true_gap)):
            continue
        all_pred_gaps.append(forecast)
        all_true_gaps.append(true_gap)
    if len(all_true_gaps) == 0 or len(all_pred_gaps) == 0:
        raise ValueError("No processed gaps to calculate metrics.")
    all_true = np.concatenate(all_true_gaps)
    all_pred = np.concatenate(all_pred_gaps)
    return all_true, all_pred

def test_synthetic_gaps_imputation_efficiency_uniautoreg_multi(df, data_scaled, gap_indices, forecast_func, model,
                                                               pre_context_length, gap_length, scaler, method_name, feature_cols):
    """
    Function to calculate metrics for unidirectional autoregression (UniAR) for multivariate data.
    
    If feature_cols are passed, only these columns are selected from data_scaled.
    During inverse scaling, a manual transformation is performed only for the target feature
    (it is assumed that it corresponds to feature_cols[0] and was trained in the scaler as the first feature).
    
    Parameters:
      - df: original DataFrame.
      - data_scaled: scaled data (2D array, shape=(n_samples, total_features)).
      - gap_indices: indices for synthetic gaps.
      - forecast_func: forecasting function (e.g., direct_uniautoreg_forecast_xgb_multi).
      - model: trained model.
      - pre_context_length: window length.
      - gap_length: number of forecasting steps.
      - scaler: StandardScaler object, trained on all features.
      - method_name: method name (for debugging).
      - feature_cols: list of feature indices used during training.
    
    Returns:
      - all_true, all_pred: concatenated arrays of true and predicted values.
    """
    all_true_gaps = []
    all_pred_gaps = []
    for gap_idx in gap_indices:
        if gap_idx - pre_context_length < 0 or gap_idx + gap_length > len(data_scaled):
            continue
        # Extract the window from data_scaled and select only the necessary features
        window = data_scaled[gap_idx - pre_context_length:gap_idx]  # shape: (pre_context_length, total_features)
        window = window[:, feature_cols]  # shape: (pre_context_length, n_selected)
        
        forecast_scaled = forecast_func(model, window, gap_length)  # (gap_length, 1)
        
        # Manual inverse scaling for the target feature:
        forecast = forecast_scaled * float(scaler.scale_[0]) + float(scaler.mean_[0])
        forecast = forecast.reshape(-1, 1)
        
        true_gap = df.loc[gap_idx:gap_idx + gap_length - 1, "pm2_5_original"].values.reshape(-1, 1)
        if np.any(np.isnan(forecast)) or np.any(np.isnan(true_gap)):
            continue
        all_pred_gaps.append(forecast)
        all_true_gaps.append(true_gap)
    if len(all_true_gaps) == 0 or len(all_pred_gaps) == 0:
        raise ValueError("No processed gaps to calculate metrics.")
    all_true = np.concatenate(all_true_gaps)
    all_pred = np.concatenate(all_pred_gaps)
    return all_true, all_pred




# Main program block
class ImputationCombiner:
    def __init__(self, df, data_scaled, scaler, pre_context_length=32, post_context_length=32):
        """
        Initializes the combiner for testing models and imputation methods.
        
        Parameters:
        - df: original DataFrame with data
        - data_scaled: scaled data
        - scaler: StandardScaler object for inverse transformation
        - pre_context_length: length of the context before the gap (default 32)
        - post_context_length: length of the context after the gap (default 32)
        """
        self.df = df
        print(f"Initializing ImputationCombiner with data_scaled shape: {data_scaled.shape if hasattr(data_scaled, 'shape') else 'No shape, type: ' + str(type(data_scaled))}")
        print(f"Type of data_scaled: {type(data_scaled)}")
        self.data_scaled = data_scaled
        self.scaler = scaler
        self.pre_context_length = pre_context_length
        self.post_context_length = post_context_length
        self.models = {}  # Dictionary to store methods and their parameters
        self.results = {}  # Dictionary to store metrics
        self.predictions = {}  # Dictionary to store true and predicted values

    def add_model(self, name, model_func=None, forecast_func=None, forecast_type="seq2seq", window_size=None, poly_degree=None, feature_cols=[0]):
        self.models[name] = {
            "func": model_func,
            "forecast": forecast_func,
            "forecast_type": forecast_type,
            "window_size": window_size,
            "poly_degree": poly_degree,
            "feature_cols": feature_cols,
            "model_info": {"feature_cols": feature_cols}  # Save feature information
        }

    def run_tests(self, gap_lengths=[3, 5, 9, 17], n_runs=5, missing_fraction=0.05):
        self.n_runs = n_runs
        for gap_length in gap_lengths:
            self.results[gap_length] = {}
            self.predictions[gap_length] = {}
            gaps_per_run = {}
            for run in range(n_runs):
                df_missing, gap_indices = introduce_synthetic_gaps(
                    self.df, "pm2_5", missing_fraction, gap_length, random_state=run
                )
                gaps_per_run[run] = (df_missing, gap_indices)

            for model_name, model_info in self.models.items():
                print(f"Testing {model_name} on gap length {gap_length}...")
                start_time = time.time()
                metrics_runs = []
                all_true_runs = []
                all_pred_runs = []
                run_times = []  # List to store the time of each run

                for run in range(n_runs):
                    run_start_time = time.time()  # Start of the run
                    print(f"  Run {run+1}/{n_runs} started at {time.time() - start_time:.2f} seconds")
                    df_missing, gap_indices = gaps_per_run[run]

                    if model_info["forecast_type"] in ["seq2seq", "autoreg"]:
                        if model_info["forecast_type"] == "seq2seq":
                            # For seq2seq - form multi-output data:
                            if "feature_cols" in model_info and isinstance(self.data_scaled, np.ndarray) and self.data_scaled.ndim == 2 and len(model_info["feature_cols"]) > 1:
                                # Multivariate variant
                                X_forward, y_forward = create_forward_data_seq2seq_multi(
                                    self.data_scaled, self.pre_context_length, gap_length, model_info["feature_cols"]
                                )
                                X_backward, y_backward = create_backward_data_seq2seq_multi(
                                    self.data_scaled, self.post_context_length, gap_length, model_info["feature_cols"]
                                )
                            else:
                                # Univariate variant
                                X_forward, y_forward = create_forward_data_seq2seq(
                                    self.data_scaled, self.pre_context_length, gap_length
                                )
                                X_backward, y_backward = create_backward_data_seq2seq(
                                    self.data_scaled, self.post_context_length, gap_length
                                )
                        elif model_info["forecast_type"] == "autoreg":
                            if (
                                "feature_cols" in model_info
                                and isinstance(self.data_scaled, np.ndarray)
                                and self.data_scaled.ndim == 2
                                and len(model_info["feature_cols"]) > 1
                            ):
                                # Multivariate variant
                                X_forward, y_forward = create_forward_data_autoreg_multi(
                                    self.data_scaled, self.pre_context_length, model_info["feature_cols"]
                                )
                                # If you have a separate function for backward, call it too:
                                # (often there's not much point for a 1D backward, but it can be implemented if needed)
                                X_backward, y_backward = create_backward_data_autoreg_multi(
                                    self.data_scaled, self.post_context_length, model_info["feature_cols"]
                                )
                            else:
                                # Univariate (standard) variant
                                X_forward, y_forward = create_forward_data_autoreg(
                                    self.data_scaled, self.pre_context_length
                                )
                                X_backward, y_backward = create_backward_data_autoreg(
                                    self.data_scaled, self.post_context_length
                                )
                        else:
                            raise ValueError("Unsupported forecast_type. Use 'seq2seq' or 'autoreg'.")

                        # Split data into train/test by time
                        X_f_train, X_f_test, y_f_train, y_f_test = time_based_split(X_forward, y_forward, train_size_ratio=0.8)
                        X_b_train, X_b_test, y_b_train, y_b_test = time_based_split(X_backward, y_backward, train_size_ratio=0.8)
                        print(f"  Data prepared at {time.time() - start_time:.2f} seconds")

                        # Train the model (the function must match the selected forecast type)
                        forward_model, backward_model = model_info["func"](
                            X_f_train, y_f_train, X_f_test, y_f_test,
                            X_b_train, y_b_train, X_b_test, y_b_test,
                            self.pre_context_length, self.post_context_length, gap_length
                        )
                        print(f"  Models trained at {time.time() - start_time:.2f} seconds")

                        all_true, all_pred = test_synthetic_gaps_imputation_efficiency(
                            self.df, self.data_scaled, gap_indices, model_info["forecast"],
                            forward_model, backward_model, self.pre_context_length, gap_length,
                            self.post_context_length, self.scaler, model_name, model_info
                        )
                        print(f"  Prediction completed at {time.time() - start_time:.2f} seconds")

                    elif model_info["forecast_type"] == "uniseq2seq":
                        # Get the list of features (feature_cols)
                        feature_cols = model_info["feature_cols"]
                        if not isinstance(feature_cols, list):
                            raise TypeError(f"feature_cols must be a list, got {type(feature_cols)}")

                        # Bridging approach: form left context (X_left) and right context (X_right).
                        # Split into univariate / multivariate depending on feature_cols.
                        if len(feature_cols) == 1 and feature_cols[0] == 0:
                            # Univariate bridging
                            # Check the shape of self.data_scaled
                            if len(self.data_scaled.shape) == 1:
                                # If data_scaled is a 1D array
                                X_left, X_right, y = create_uniseq2seq_data(
                                    self.data_scaled,
                                    self.pre_context_length,
                                    gap_length,
                                    self.post_context_length
                                )
                            elif len(self.data_scaled.shape) == 2:
                                # data_scaled.shape[1] == 1, i.e., [:,0]
                                X_left, X_right, y = create_uniseq2seq_data(
                                    self.data_scaled[:, 0],
                                    self.pre_context_length,
                                    gap_length,
                                    self.post_context_length
                                )
                            else:
                                raise ValueError(
                                    f"Unexpected shape for data_scaled in univariate mode: {self.data_scaled.shape}"
                                )
                        else:
                            # Multivariate bridging
                            if not isinstance(self.data_scaled, np.ndarray):
                                raise TypeError(f"data_scaled must be a numpy array for multivariate models, got {type(self.data_scaled)}")
                            if len(self.data_scaled.shape) != 2:
                                raise ValueError(
                                    f"Multivariate models require a 2D data_scaled array, got shape {self.data_scaled.shape}"
                                )

                            # Debugging information
                            print(f"Debug: self.data_scaled shape before create_uniseq2seq_data_multi: {self.data_scaled.shape}")
                            print(f"Debug: feature_cols: {feature_cols}")

                            X_left, X_right, y = create_uniseq2seq_data_multi(
                                self.data_scaled,
                                self.pre_context_length,
                                gap_length,
                                self.post_context_length,
                                feature_cols
                            )
                        def time_based_split_3(X1, X2, y, train_size_ratio=0.8):
                            n = len(X1)
                            train_size = int(n * train_size_ratio)
                            return (
                                X1[:train_size], X1[train_size:], 
                                X2[:train_size], X2[train_size:], 
                                y[:train_size],  y[train_size:]
                            )
                        # Split into training and test sets (bridging: 2 inputs -> 6 outputs)
                        X_left_train, X_left_test, X_right_train, X_right_test, y_train, y_test = time_based_split_3(X_left, X_right, y, train_size_ratio=0.8)
                        print(f"  Data prepared at {time.time() - start_time:.2f} seconds")

                        # Model training (two-input call)
                        forward_model, _ = model_info["func"](
                            X_left_train, y_train, X_left_test, y_test,
                            X_right_train, y_train, X_right_test, y_test,
                            self.pre_context_length, self.post_context_length, gap_length,
                            model_info
                        )
                        print(f"  Model trained at {time.time() - start_time:.2f} seconds")

                        # Prediction and evaluation - general function
                        all_true, all_pred = test_synthetic_gaps_imputation_efficiency_uniseq2seq(
                            self.df, self.data_scaled, gap_indices, model_info["forecast"],
                            forward_model, self.pre_context_length, gap_length,
                            self.post_context_length, self.scaler, model_name
                        )
                        print(f"  Prediction completed at {time.time() - start_time:.2f} seconds")

    
                    elif model_info["forecast_type"] == "uniautoreg":
                        # For multivariate mode, use a special function to form autoregressive data
                        if "feature_cols" in model_info and isinstance(self.data_scaled, np.ndarray) and self.data_scaled.ndim == 2 and len(model_info["feature_cols"]) > 1:
                            X_forward, y_forward = create_forward_data_autoreg_multi(self.data_scaled, self.pre_context_length, model_info["feature_cols"])
                        else:
                            X_forward, y_forward = create_forward_data_autoreg(self.data_scaled, self.pre_context_length)
                        X_f_train, X_f_test, y_f_train, y_f_test = time_based_split(X_forward, y_forward, train_size_ratio=0.8)

                        print(f"  Data prepared at {time.time() - start_time:.2f} seconds")
                        forward_model, _ = model_info["func"](
                            X_f_train, y_f_train, X_f_test, y_f_test,
                            None, None, None, None,
                            self.pre_context_length, self.post_context_length, gap_length
                        )
                        print(f"  Model trained at {time.time() - start_time:.2f} seconds")
                        if "feature_cols" in model_info and isinstance(self.data_scaled, np.ndarray) and self.data_scaled.ndim == 2 and len(model_info["feature_cols"]) > 1:
                            all_true, all_pred = test_synthetic_gaps_imputation_efficiency_uniautoreg_multi(
                                self.df, self.data_scaled, gap_indices, model_info["forecast"],
                                forward_model, self.pre_context_length, gap_length, self.scaler, model_name,
                                model_info["feature_cols"]
                            )
                        else:
                            all_true, all_pred = test_synthetic_gaps_imputation_efficiency_uniautoreg(
                                self.df, self.data_scaled, gap_indices, model_info["forecast"],
                                forward_model, self.pre_context_length, gap_length, self.scaler, model_name
                            )
                        print(f"  Prediction completed at {time.time() - start_time:.2f} seconds")

                    elif model_info["forecast_type"] in ["simple", "local", "window"]:
                        window_size = model_info["window_size"] or (15 + gap_length + 15)
                        all_true = []
                        all_pred = []
                        for gap_idx in gap_indices:
                            true_gap = df_missing.loc[gap_idx:gap_idx + gap_length - 1, "pm2_5_original"].values.reshape(-1, 1)
                            # Use window_size from model_info
                            half_window = window_size // 2  # Half of the window before and after the gap
                            start_idx = max(0, gap_idx - half_window)
                            end_idx = min(len(df_missing), gap_idx + gap_length + half_window)
                            if model_info["forecast_type"] == "simple":
                                if model_name == "Simple Imputer Mean":
                                    imputer = SimpleImputer(strategy="mean")
                                elif model_name == "Simple Imputer Median":
                                    imputer = SimpleImputer(strategy="median")
                                data_with_gaps = df_missing["pm2_5"].values.reshape(-1, 1)
                                pred_gap = imputer.fit_transform(data_with_gaps)[gap_idx:gap_idx + gap_length].reshape(-1, 1)
                            elif model_info["forecast_type"] == "local":
                                if model_name in ["Local Imputation Mean", "Local Imputation Median"]:
                                    window_data = df_missing.loc[start_idx:end_idx, "pm2_5"].dropna()
                                    pred_value = np.mean(window_data) if model_name == "Local Imputation Mean" else np.median(window_data)
                                    pred_gap = np.full(gap_length, pred_value if not window_data.empty else np.nan).reshape(-1, 1)
                            elif model_info["forecast_type"] == "window":
                                window_data = df_missing.loc[start_idx:end_idx, "pm2_5"]
                                print(f"Window NaN count: {window_data.isna().sum()}")
                                if model_name == "ARIMA Imputation":
                                    pre_gap_data = window_data[:gap_idx - start_idx].dropna()
                                    print(f"ARIMA: gap_idx={gap_idx}, pre_gap_data length={len(pre_gap_data)}")
                                    if len(pre_gap_data) >= 10:  # Minimum length for fitting
                                        try:
                                            model = ARIMA(pre_gap_data.to_numpy(), 
                                                        order=(1, 0, 0),  # Best baseline
                                                        enforce_stationarity=True, enforce_invertibility=True)
                                            fitted_model = model.fit()
                                            pred_gap = fitted_model.forecast(steps=gap_length)
                                            pred_gap = pred_gap.reshape(-1, 1)
                                            print(f"ARIMA: Prediction successful, pred_gap={pred_gap[:5].ravel()}")
                                        except Exception as e:
                                            print(f"ARIMA: Error during fit - {str(e)}, falling back to linear interpolation")
                                            if len(pre_gap_data) >= 2:
                                                x = np.arange(len(pre_gap_data))
                                                interp_func = interpolate.interp1d(x, pre_gap_data.to_numpy(), kind='linear', fill_value="extrapolate")
                                                pred_x = np.arange(len(pre_gap_data), len(pre_gap_data) + gap_length)
                                                pred_gap = interp_func(pred_x).reshape(-1, 1)
                                            else:
                                                pred_gap = np.full(gap_length, np.nan).reshape(-1, 1)
                                    else:
                                        print(f"ARIMA: Not enough data for fit (len={len(pre_gap_data)})")
                                        pred_gap = np.full(gap_length, np.nan).reshape(-1, 1)
                                elif model_name == "Linear Window Interpolation" and len(window_data.dropna()) >= 2:
                                    x = np.arange(len(window_data))
                                    gap_x = np.arange(gap_idx - start_idx, gap_idx - start_idx + gap_length)
                                    y = window_data.values
                                    mask = ~np.isnan(y)
                                    interp_func = interpolate.interp1d(x[mask], y[mask], kind='linear', bounds_error=False, fill_value=np.nan)
                                    pred_gap = interp_func(gap_x).reshape(-1, 1)
                                elif model_name == "B-spline Window Interpolation" and len(window_data.dropna()) >= 3:
                                    window_data = window_data.dropna()
                                    x = np.arange(len(window_data))
                                    y = window_data.values
                                    gap_x = np.arange(gap_idx - start_idx, gap_idx - start_idx + gap_length)
                                    spline = interpolate.UnivariateSpline(x, y, k=2, ext=0)
                                    pred_gap = spline(gap_x).reshape(-1, 1)
                                elif model_name == "Polynomial Window Interpolation" and len(window_data.dropna()) >= (model_info["poly_degree"] or 3) + 1:
                                    window_data = window_data.dropna()
                                    poly_degree = model_info["poly_degree"] or 3
                                    x = np.arange(len(window_data))
                                    y = window_data.values
                                    coeffs = np.polyfit(x, y, poly_degree)
                                    poly_func = np.poly1d(coeffs)
                                    gap_x = np.arange(gap_idx - start_idx, gap_idx - start_idx + gap_length)
                                    pred_gap = poly_func(gap_x).reshape(-1, 1)
                                else:
                                    pred_gap = np.full(gap_length, np.nan).reshape(-1, 1)
                            if not np.any(np.isnan(true_gap)) and not np.any(np.isnan(pred_gap)):
                                all_true.append(true_gap.ravel())
                                all_pred.append(pred_gap.ravel())
                            else:
                                print(f"Skipping gap_idx={gap_idx} due to NaN in true_gap or pred_gap")
                        if not all_true or not all_pred:
                            raise ValueError("No processed gaps to calculate metrics.")
                        all_true = np.concatenate(all_true)
                        all_pred = np.concatenate(all_pred)

                    else:
                        raise ValueError("Unsupported forecast_type. Use 'seq2seq', 'autoreg', 'uniseq2seq', 'uniautoreg', 'simple', 'local', or 'window'.")

                    metrics = evaluate_model(all_true, all_pred)
                    metrics_runs.append(metrics)
                    all_true_runs.append(all_true.ravel())
                    all_pred_runs.append(all_pred.ravel())
                    run_time = time.time() - run_start_time  # Run execution time
                    run_times.append(run_time)
                    print(f"  Run {run+1}/{n_runs} completed in {run_time:.2f} seconds")

                total_time = time.time() - start_time  # Total time for the model
                metrics_array = np.array(metrics_runs)
                self.results[gap_length][model_name] = {
                    "MAE": {"mean": np.mean(metrics_array[:, 0]), "std": np.std(metrics_array[:, 0])},
                    "RMSE": {"mean": np.mean(metrics_array[:, 1]), "std": np.std(metrics_array[:, 1])},
                    "R2": {"mean": np.mean(metrics_array[:, 2]), "std": np.std(metrics_array[:, 2])},
                    "MAPE": {"mean": np.mean(metrics_array[:, 3]), "std": np.std(metrics_array[:, 3])},
                    "run_times": run_times,  # Time for each run
                    "total_time": total_time  # Total time
                }
                self.predictions[gap_length][model_name] = {
                    "all_true": np.concatenate(all_true_runs),
                    "all_pred": np.concatenate(all_pred_runs)
                }
                print(f"Finished {model_name} on gap length {gap_length} in {total_time:.2f} seconds")
                time.sleep(0.1)
                K.clear_session()
                gc.collect()
                time.sleep(0.1)
    

    # Visualization functions
    def plot_violin_errors(self, output_dir="plots"):
        plt.figure(figsize=(12, 6))
        error_data = []
        model_names = []
        for model_name in self.models.keys():
            all_true_agg = np.concatenate([self.predictions[gl][model_name]["all_true"] for gl in self.results.keys()])
            all_pred_agg = np.concatenate([self.predictions[gl][model_name]["all_pred"] for gl in self.results.keys()])
            errors = all_true_agg - all_pred_agg
            error_data.append(errors)
            model_names.append(model_name)
        
        plt.violinplot(error_data, showmedians=True)
        plt.xticks(range(1, len(model_names) + 1), model_names, rotation=45)
        plt.xlabel("Model")
        plt.ylabel("Prediction Error (True - Predicted)")
        plt.title("Distribution of Prediction Errors Across Models")
        plt.grid(True)
        plt.tight_layout()
        plt.savefig(os.path.join(output_dir, "violin_plot_errors.png"), dpi=600)
        plt.close()

    def plot_scatter_by_model(self, gap_length, model_name, output_dir="plots"):
        all_true = self.predictions[gap_length][model_name]["all_true"]
        all_pred = self.predictions[gap_length][model_name]["all_pred"]
        plt.figure(figsize=(8, 6))
        plt.scatter(all_true, all_pred, alpha=0.5, label=f"{model_name} (gap={gap_length})")
        plt.plot([min(all_true), max(all_true)], [min(all_true), max(all_true)], 'r--', lw=2)
        plt.xlabel("True Values")
        plt.ylabel("Predicted Values")
        plt.title(f"Scatter Plot: {model_name} (Gap Length: {gap_length})")
        plt.legend()
        plt.grid(True)
        plt.savefig(os.path.join(output_dir, f"scatter_{model_name}_gap_{gap_length}.png"), dpi=600)
        plt.close()

    def plot_scatter_aggregated(self, model_name, output_dir="plots"):
        all_true_agg = np.concatenate([self.predictions[gl][model_name]["all_true"] for gl in self.results.keys()])
        all_pred_agg = np.concatenate([self.predictions[gl][model_name]["all_pred"] for gl in self.results.keys()])
        plt.figure(figsize=(8, 6))
        plt.scatter(all_true_agg, all_pred_agg, alpha=0.5, label=f"{model_name} (All Gaps)")
        plt.plot([min(all_true_agg), max(all_true_agg)], [min(all_true_agg), max(all_true_agg)], 'r--', lw=2)
        plt.xlabel("True Values")
        plt.ylabel("Predicted Values")
        plt.title(f"Aggregated Scatter Plot: {model_name}")
        plt.legend()
        plt.grid(True)
        plt.savefig(os.path.join(output_dir, f"scatter_{model_name}_aggregated.png"), dpi=600)
        plt.close()

    def plot_time_series(self, gap_length, model_name, idx=0, output_dir="plots"):
        true_vals = self.predictions[gap_length][model_name]["all_true"]
        pred_vals = self.predictions[gap_length][model_name]["all_pred"]
        gap_size = gap_length * self.n_runs
        start_idx = idx * gap_length
        true_segment = true_vals[start_idx:start_idx + gap_length]
        pred_segment = pred_vals[start_idx:start_idx + gap_length]
        
        plt.figure(figsize=(10, 5))
        plt.plot(range(gap_length), true_segment, label="True", marker="o")
        plt.plot(range(gap_length), pred_segment, label="Predicted", marker="x")
        plt.xlabel("Time Step")
        plt.ylabel("pm2_5")
        plt.title(f"{model_name} - Time Series Example (Gap Length: {gap_length})")
        plt.legend()
        plt.grid(True)
        plt.savefig(os.path.join(output_dir, f"time_series_{model_name}_gap_{gap_length}_idx_{idx}.png"), dpi=600)
        plt.close()

    def plot_metrics_heatmap(self, output_dir="plots"):
        metrics = ["MAE", "RMSE", "R2", "MAPE"]
        for metric in metrics:
            data = {gl: {m: self.results[gl][m][metric]["mean"] for m in self.results[gl]} 
                    for gl in self.results}
            df = pd.DataFrame(data).T
            plt.figure(figsize=(12, 8))
            sns.heatmap(df, annot=True, cmap="YlGnBu", fmt=".3f")
            plt.title(f"{metric} Heatmap Across Models and Gap Lengths")
            plt.xlabel("Model")
            plt.ylabel("Gap Length")
            plt.savefig(os.path.join(output_dir, f"heatmap_{metric}.png"), dpi=600)
            plt.close()

    def plot_metric_trend(self, metric="MAE", output_dir="plots"):
        plt.figure(figsize=(12, 6))
        for model_name in self.models.keys():
            means = [self.results[gl][model_name][metric]["mean"] for gl in sorted(self.results.keys())]
            plt.plot(sorted(self.results.keys()), means, label=model_name, marker="o")
        plt.xlabel("Gap Length")
        plt.ylabel(f"{metric}")
        plt.title(f"{metric} Trend Across Gap Lengths")
        plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
        plt.grid(True)
        plt.tight_layout()
        plt.savefig(os.path.join(output_dir, f"trend_{metric}.png"), dpi=600)
        plt.close()

    def plot_grouped_boxplot(self, output_dir="plots"):
        groups = {}
        for model_name, model_info in self.models.items():
            group = model_info["forecast_type"]
            if group not in groups:
                groups[group] = []
            for gl in self.results:
                true = self.predictions[gl][model_name]["all_true"]
                pred = self.predictions[gl][model_name]["all_pred"]
                errors = true - pred
                groups[group].extend(errors)
        
        plt.figure(figsize=(12, 6))
        plt.boxplot([errors for errors in groups.values()], labels=groups.keys())
        plt.xlabel("Method Group")
        plt.ylabel("Prediction Error")
        plt.title("Error Distribution by Method Group")
        plt.grid(True)
        plt.savefig(os.path.join(output_dir, "boxplot_by_group.png"), dpi=600)
        plt.close()
    
    
        
    def summarize_results(self, visualize=False, output_dir="plots"):
        """
        Outputs the test results, creates visualizations (if visualize=True)
        and saves aggregated results for later analysis.
        
        Parameters:
        - visualize: flag to enable/disable visualization (default False)
        - output_dir: directory for saving graphs and results (default "plots")
        """
        summary_list = []
        for gap_length, models in self.results.items():
            print(f"\nResults for gap length {gap_length} (n_runs={self.n_runs}):")
            table_data = []
            for model_name, metrics in models.items():
                avg_run_time = np.mean(metrics["run_times"])  # Average time per run
                total_time = metrics["total_time"]
                row = [
                    model_name,
                    f"{metrics['MAE']['mean']:.3f} ± {metrics['MAE']['std']:.3f}",
                    f"{metrics['RMSE']['mean']:.3f} ± {metrics['RMSE']['std']:.3f}",
                    f"{metrics['R2']['mean']:.3f} ± {metrics['R2']['std']:.3f}",
                    f"{metrics['MAPE']['mean']:.3f} ± {metrics['MAPE']['std']:.3f}",
                    f"{avg_run_time:.2f} | {total_time:.2f}"
                ]
                table_data.append(row)
                # Add a row to the aggregated list of results with model parameters
                summary_list.append({
                    "Gap Length": gap_length,
                    "Model": model_name,
                    "Forecast Type": self.models[model_name]["forecast_type"] if model_name in self.models else None,
                    "n_runs": self.n_runs,
                    "MAE Mean": metrics["MAE"]["mean"],
                    "MAE Std": metrics["MAE"]["std"],
                    "RMSE Mean": metrics["RMSE"]["mean"],
                    "RMSE Std": metrics["RMSE"]["std"],
                    "R2 Mean": metrics["R2"]["mean"],
                    "R2 Std": metrics["R2"]["std"],
                    "MAPE Mean": metrics["MAPE"]["mean"],
                    "MAPE Std": metrics["MAPE"]["std"],
                    "Average Run Time": avg_run_time,
                    "Total Time": total_time,
                    "Epochs": EPOCHS,
                    "Batch Size": BATCH_SIZE,
                    "Patience": PATIENCE,
                    "Estimators": N_ESTIMATORS
                })
            headers = ["Model", "MAE (mean±std)", "RMSE (mean±std)", "R2 (mean±std)", "MAPE (mean±std)", "Time (avg run | total, s)"]
            print(tabulate(table_data, headers=headers, tablefmt="grid", floatfmt=".3f"))
        
        # Saving aggregated results to a DataFrame and writing to a CSV file
        summary_df = pd.DataFrame(summary_list)
        self.results_summary = summary_df  # save in an attribute for later use
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        summary_file = os.path.join(output_dir, "results_summary.csv")
        summary_df.to_csv(summary_file, index=False)
        print(f"\nAggregated results saved to {summary_file}")
        
        if visualize:
            self.plot_violin_errors(output_dir)
            for gap_length in self.results.keys():
                for model_name in self.models.keys():
                    self.plot_scatter_by_model(gap_length, model_name, output_dir)
            for model_name in self.models.keys():
                self.plot_scatter_aggregated(model_name, output_dir)
            self.plot_metrics_heatmap(output_dir)
            self.plot_metric_trend(metric="MAE", output_dir=output_dir)
            self.plot_metric_trend(metric="RMSE", output_dir=output_dir)
            self.plot_grouped_boxplot(output_dir)
            for gap_length in self.results.keys():
                for model_name in self.models.keys():
                    self.plot_time_series(gap_length, model_name, idx=0, output_dir=output_dir)
                
# Functions for creating and training models

# Functions for creating and training Seq2Seq models with a bidirectional approach
def create_and_train_tcn_seq2seq(X_f_train, y_f_train, X_f_test, y_f_test, X_b_train, y_b_train, X_b_test, y_b_test, 
                                 pre_context_length, post_context_length, gap_length):
    early_stopping = EarlyStopping(monitor='val_loss', patience=PATIENCE, restore_best_weights=True)
    forward_model = Sequential([
        TCN(nb_filters=32, kernel_size=3, dilations=[1, 2, 4], padding='causal', 
            use_skip_connections=True, dropout_rate=0.2, return_sequences=False, 
            input_shape=(pre_context_length, 1)),
        Dense(gap_length)
    ])
    forward_model.compile(optimizer='adam', loss='mse')
    forward_model.fit(X_f_train, y_f_train.reshape(-1, gap_length), epochs=EPOCHS, batch_size=BATCH_SIZE, 
                      validation_data=(X_f_test, y_f_test.reshape(-1, gap_length)), callbacks=[early_stopping], verbose=0)
    
    backward_model = Sequential([
        TCN(nb_filters=32, kernel_size=3, dilations=[1, 2, 4], padding='causal', 
            use_skip_connections=True, dropout_rate=0.2, return_sequences=False, 
            input_shape=(post_context_length, 1)),
        Dense(gap_length)
    ])
    backward_model.compile(optimizer='adam', loss='mse')
    backward_model.fit(X_b_train, y_b_train.reshape(-1, gap_length), epochs=EPOCHS, batch_size=BATCH_SIZE, 
                       validation_data=(X_b_test, y_b_test.reshape(-1, gap_length)), callbacks=[early_stopping], verbose=0)
    return forward_model, backward_model

def create_and_train_lstm_seq2seq(X_f_train, y_f_train, X_f_test, y_f_test, X_b_train, y_b_train, X_b_test, y_b_test, 
                                  pre_context_length, post_context_length, gap_length):
    early_stopping = EarlyStopping(monitor='val_loss', patience=PATIENCE, restore_best_weights=True)
    forward_model = Sequential([
        LSTM(64, input_shape=(pre_context_length, 1), return_sequences=False),
        Dense(gap_length)
    ])
    forward_model.compile(optimizer='adam', loss='mse')
    forward_model.fit(X_f_train, y_f_train.reshape(-1, gap_length), epochs=EPOCHS, batch_size=BATCH_SIZE, 
                      validation_data=(X_f_test, y_f_test.reshape(-1, gap_length)), callbacks=[early_stopping], verbose=0)
    
    backward_model = Sequential([
        LSTM(64, input_shape=(post_context_length, 1), return_sequences=False),
        Dense(gap_length)
    ])
    backward_model.compile(optimizer='adam', loss='mse')
    backward_model.fit(X_b_train, y_b_train.reshape(-1, gap_length), epochs=EPOCHS, batch_size=BATCH_SIZE, 
                       validation_data=(X_b_test, y_b_test.reshape(-1, gap_length)), callbacks=[early_stopping], verbose=0)
    return forward_model, backward_model

def create_and_train_gru_seq2seq(X_f_train, y_f_train, X_f_test, y_f_test, X_b_train, y_b_train, X_b_test, y_b_test, 
                                 pre_context_length, post_context_length, gap_length):
    early_stopping = EarlyStopping(monitor='val_loss', patience=PATIENCE, restore_best_weights=True)
    forward_model = Sequential([
        GRU(64, input_shape=(pre_context_length, 1), return_sequences=False),
        Dense(gap_length)
    ])
    forward_model.compile(optimizer='adam', loss='mse')
    forward_model.fit(X_f_train, y_f_train.reshape(-1, gap_length), epochs=EPOCHS, batch_size=BATCH_SIZE, 
                      validation_data=(X_f_test, y_f_test.reshape(-1, gap_length)), callbacks=[early_stopping], verbose=0)
    
    backward_model = Sequential([
        GRU(64, input_shape=(post_context_length, 1), return_sequences=False),
        Dense(gap_length)
    ])
    backward_model.compile(optimizer='adam', loss='mse')
    backward_model.fit(X_b_train, y_b_train.reshape(-1, gap_length), epochs=EPOCHS, batch_size=BATCH_SIZE, 
                       validation_data=(X_b_test, y_b_test.reshape(-1, gap_length)), callbacks=[early_stopping], verbose=0)
    return forward_model, backward_model

def create_and_train_cnn_seq2seq(X_f_train, y_f_train, X_f_test, y_f_test, X_b_train, y_b_train, X_b_test, y_b_test, 
                                 pre_context_length, post_context_length, gap_length):
    early_stopping = EarlyStopping(monitor='val_loss', patience=PATIENCE, restore_best_weights=True)
    forward_model = Sequential([
        Conv1D(filters=32, kernel_size=3, activation='relu', padding='causal', input_shape=(pre_context_length, 1)),
        Conv1D(filters=32, kernel_size=3, activation='relu', padding='causal'),
        Flatten(),
        Dense(gap_length)
    ])
    forward_model.compile(optimizer='adam', loss='mse')
    forward_model.fit(X_f_train, y_f_train.reshape(-1, gap_length), epochs=EPOCHS, batch_size=BATCH_SIZE, 
                      validation_data=(X_f_test, y_f_test.reshape(-1, gap_length)), callbacks=[early_stopping], verbose=0)
    
    backward_model = Sequential([
        Conv1D(filters=32, kernel_size=3, activation='relu', padding='causal', input_shape=(post_context_length, 1)),
        Conv1D(filters=32, kernel_size=3, activation='relu', padding='causal'),
        Flatten(),
        Dense(gap_length)
    ])
    backward_model.compile(optimizer='adam', loss='mse')
    backward_model.fit(X_b_train, y_b_train.reshape(-1, gap_length), epochs=EPOCHS, batch_size=BATCH_SIZE, 
                       validation_data=(X_b_test, y_b_test.reshape(-1, gap_length)), callbacks=[early_stopping], verbose=0)
    return forward_model, backward_model

def create_and_train_rf_seq2seq(X_f_train, y_f_train, X_f_test, y_f_test, X_b_train, y_b_train, X_b_test, y_b_test, 
                                pre_context_length, post_context_length, gap_length):
    # Convert 3D data (samples, timesteps, features) to 2D (samples, timesteps*features) for RandomForest
    X_f_train_flat = X_f_train.reshape(X_f_train.shape[0], -1)
    y_f_train_flat = y_f_train.reshape(y_f_train.shape[0], -1)
    X_f_test_flat = X_f_test.reshape(X_f_test.shape[0], -1)
    X_b_train_flat = X_b_train.reshape(X_b_train.shape[0], -1)
    y_b_train_flat = y_b_train.reshape(y_b_train.shape[0], -1)
    X_b_test_flat = X_b_test.reshape(X_b_test.shape[0], -1)

    forward_model = RandomForestRegressor(n_estimators=N_ESTIMATORS, random_state=42)
    forward_model.fit(X_f_train_flat, y_f_train_flat)
    
    backward_model = RandomForestRegressor(n_estimators=N_ESTIMATORS, random_state=42)
    backward_model.fit(X_b_train_flat, y_b_train_flat)
    
    return forward_model, backward_model

def create_and_train_xgb_seq2seq(X_f_train, y_f_train, X_f_test, y_f_test, X_b_train, y_b_train, X_b_test, y_b_test, 
                                 pre_context_length, post_context_length, gap_length):
    # Convert 3D data (samples, timesteps, features) to 2D (samples, timesteps*features) for XGBoost
    X_f_train_flat = X_f_train.reshape(X_f_train.shape[0], -1)
    y_f_train_flat = y_f_train.reshape(y_f_train.shape[0], -1)
    X_f_test_flat = X_f_test.reshape(X_f_test.shape[0], -1)
    X_b_train_flat = X_b_train.reshape(X_b_train.shape[0], -1)
    y_b_train_flat = y_b_train.reshape(y_b_train.shape[0], -1)
    X_b_test_flat = X_b_test.reshape(X_b_test.shape[0], -1)

    forward_model = XGBRegressor(n_estimators=N_ESTIMATORS, random_state=42)
    forward_model.fit(X_f_train_flat, y_f_train_flat)
    
    backward_model = XGBRegressor(n_estimators=N_ESTIMATORS, random_state=42)
    backward_model.fit(X_b_train_flat, y_b_train_flat)
    
    return forward_model, backward_model

def create_and_train_rnn_seq2seq(X_f_train, y_f_train, X_f_test, y_f_test, X_b_train, y_b_train, X_b_test, y_b_test, 
                                 pre_context_length, post_context_length, gap_length):
    early_stopping = EarlyStopping(monitor='val_loss', patience=PATIENCE, restore_best_weights=True)
    forward_model = Sequential([
        SimpleRNN(64, input_shape=(pre_context_length, 1), return_sequences=False),
        Dense(gap_length)
    ])
    forward_model.compile(optimizer='adam', loss='mse')
    forward_model.fit(X_f_train, y_f_train.reshape(-1, gap_length), epochs=EPOCHS, batch_size=BATCH_SIZE, 
                      validation_data=(X_f_test, y_f_test.reshape(-1, gap_length)), callbacks=[early_stopping], verbose=0)
    
    backward_model = Sequential([
        SimpleRNN(64, input_shape=(post_context_length, 1), return_sequences=False),
        Dense(gap_length)
    ])
    backward_model.compile(optimizer='adam', loss='mse')
    backward_model.fit(X_b_train, y_b_train.reshape(-1, gap_length), epochs=EPOCHS, batch_size=BATCH_SIZE, 
                       validation_data=(X_b_test, y_b_test.reshape(-1, gap_length)), callbacks=[early_stopping], verbose=0)
    return forward_model, backward_model

# Functions for for creating and training autoregression models with a bidirectional approach
def create_and_train_rnn_autoreg(X_f_train, y_f_train, X_f_test, y_f_test, X_b_train, y_b_train, X_b_test, y_b_test, 
                                 pre_context_length, post_context_length, gap_length):
    early_stopping = EarlyStopping(monitor='val_loss', patience=PATIENCE, restore_best_weights=True)

    # Forward model
    forward_input = Input(shape=(pre_context_length, 1))
    x = SimpleRNN(64, activation='tanh')(forward_input)
    forward_output = Dense(1)(x)
    forward_model = Model(forward_input, forward_output)
    forward_model.compile(optimizer=Adam(), loss='mse')
    forward_model.fit(X_f_train, y_f_train, epochs=EPOCHS, batch_size=BATCH_SIZE, 
                      validation_data=(X_f_test, y_f_test), callbacks=[early_stopping], verbose=0)

    # Backward model
    backward_input = Input(shape=(post_context_length, 1))
    x = SimpleRNN(64, activation='tanh')(backward_input)
    backward_output = Dense(1)(x)
    backward_model = Model(backward_input, backward_output)
    backward_model.compile(optimizer=Adam(), loss='mse')
    backward_model.fit(X_b_train, y_b_train, epochs=EPOCHS, batch_size=BATCH_SIZE, 
                       validation_data=(X_b_test, y_b_test), callbacks=[early_stopping], verbose=0)

    return forward_model, backward_model

def create_and_train_lstm_autoreg(X_f_train, y_f_train, X_f_test, y_f_test, X_b_train, y_b_train, X_b_test, y_b_test, 
                                  pre_context_length, post_context_length, gap_length):
    early_stopping = EarlyStopping(monitor='val_loss', patience=PATIENCE, restore_best_weights=True)

    # Forward model
    forward_input = Input(shape=(pre_context_length, 1))
    x = LSTM(64, activation='tanh')(forward_input)
    forward_output = Dense(1)(x)
    forward_model = Model(forward_input, forward_output)
    forward_model.compile(optimizer=Adam(), loss='mse')
    forward_model.fit(X_f_train, y_f_train, epochs=EPOCHS, batch_size=BATCH_SIZE, 
                      validation_data=(X_f_test, y_f_test), callbacks=[early_stopping], verbose=0)

    # Backward model
    backward_input = Input(shape=(post_context_length, 1))
    x = LSTM(64, activation='tanh')(backward_input)
    backward_output = Dense(1)(x)
    backward_model = Model(backward_input, backward_output)
    backward_model.compile(optimizer=Adam(), loss='mse')
    backward_model.fit(X_b_train, y_b_train, epochs=EPOCHS, batch_size=BATCH_SIZE, 
                       validation_data=(X_b_test, y_b_test), callbacks=[early_stopping], verbose=0)

    return forward_model, backward_model

def create_and_train_gru_autoreg(X_f_train, y_f_train, X_f_test, y_f_test, X_b_train, y_b_train, X_b_test, y_b_test, 
                                 pre_context_length, post_context_length, gap_length):
    early_stopping = EarlyStopping(monitor='val_loss', patience=PATIENCE, restore_best_weights=True)

    # Forward model
    forward_input = Input(shape=(pre_context_length, 1))
    x = GRU(64, activation='tanh')(forward_input)
    forward_output = Dense(1)(x)
    forward_model = Model(forward_input, forward_output)
    forward_model.compile(optimizer=Adam(), loss='mse')
    forward_model.fit(X_f_train, y_f_train, epochs=EPOCHS, batch_size=BATCH_SIZE, 
                      validation_data=(X_f_test, y_f_test), callbacks=[early_stopping], verbose=0)

    # Backward model
    backward_input = Input(shape=(post_context_length, 1))
    x = GRU(64, activation='tanh')(backward_input)
    backward_output = Dense(1)(x)
    backward_model = Model(backward_input, backward_output)
    backward_model.compile(optimizer=Adam(), loss='mse')
    backward_model.fit(X_b_train, y_b_train, epochs=EPOCHS, batch_size=BATCH_SIZE, 
                       validation_data=(X_b_test, y_b_test), callbacks=[early_stopping], verbose=0)

    return forward_model, backward_model

def create_and_train_cnn_autoreg(X_f_train, y_f_train, X_f_test, y_f_test, X_b_train, y_b_train, X_b_test, y_b_test, 
                                 pre_context_length, post_context_length, gap_length):
    early_stopping = EarlyStopping(monitor='val_loss', patience=PATIENCE, restore_best_weights=True)

    # Forward model
    forward_input = Input(shape=(pre_context_length, 1))
    x = Conv1D(filters=32, kernel_size=3, activation='relu', padding='causal')(forward_input)
    x = Flatten()(x)
    forward_output = Dense(1)(x)
    forward_model = Model(forward_input, forward_output)
    forward_model.compile(optimizer=Adam(), loss='mse')
    forward_model.fit(X_f_train, y_f_train, epochs=EPOCHS, batch_size=BATCH_SIZE, 
                      validation_data=(X_f_test, y_f_test), callbacks=[early_stopping], verbose=0)

    # Backward model
    backward_input = Input(shape=(post_context_length, 1))
    x = Conv1D(filters=32, kernel_size=3, activation='relu', padding='causal')(backward_input)
    x = Flatten()(x)
    backward_output = Dense(1)(x)
    backward_model = Model(backward_input, backward_output)
    backward_model.compile(optimizer=Adam(), loss='mse')
    backward_model.fit(X_b_train, y_b_train, epochs=EPOCHS, batch_size=BATCH_SIZE, 
                       validation_data=(X_b_test, y_b_test), callbacks=[early_stopping], verbose=0)

    return forward_model, backward_model

def create_and_train_rf_autoreg(X_f_train, y_f_train, X_f_test, y_f_test, X_b_train, y_b_train, X_b_test, y_b_test, 
                                pre_context_length, post_context_length, gap_length):
    # Convert 3D data (samples, timesteps, features) to 2D (samples, timesteps*features) for RandomForest
    X_f_train_flat = X_f_train.reshape(X_f_train.shape[0], -1)
    X_f_test_flat = X_f_test.reshape(X_f_test.shape[0], -1)
    X_b_train_flat = X_b_train.reshape(X_b_train.shape[0], -1)
    X_b_test_flat = X_b_test.reshape(X_b_test.shape[0], -1)

    # Forward model
    forward_model = RandomForestRegressor(n_estimators=N_ESTIMATORS, random_state=42)
    forward_model.fit(X_f_train_flat, y_f_train.reshape(-1, 1))  
    


    # Backward model
    backward_model = RandomForestRegressor(n_estimators=N_ESTIMATORS, random_state=42)
    backward_model.fit(X_b_train_flat, y_b_train.reshape(-1, 1))  
    
    return forward_model, backward_model

def create_and_train_xgb_autoreg(X_f_train, y_f_train, X_f_test, y_f_test, X_b_train, y_b_train, X_b_test, y_b_test, 
                                 pre_context_length, post_context_length, gap_length):
    # Convert 3D data (samples, timesteps, features) to 2D (samples, timesteps*features) for XGBoost
    X_f_train_flat = X_f_train.reshape(X_f_train.shape[0], -1)
    X_f_test_flat = X_f_test.reshape(X_f_test.shape[0], -1)
    X_b_train_flat = X_b_train.reshape(X_b_train.shape[0], -1)
    X_b_test_flat = X_b_test.reshape(X_b_test.shape[0], -1)

    # Forward model
    forward_model = XGBRegressor(n_estimators=N_ESTIMATORS, random_state=42)
    forward_model.fit(X_f_train_flat, y_f_train.ravel())  # Use ravel() for 1D output
    
    # Backward model
    backward_model = XGBRegressor(n_estimators=N_ESTIMATORS, random_state=42)
    backward_model.fit(X_b_train_flat, y_b_train.ravel())  # Use ravel() for 1D output
    
    return forward_model, backward_model

def create_and_train_tcn_autoreg(X_f_train, y_f_train, X_f_test, y_f_test, X_b_train, y_b_train, X_b_test, y_b_test, 
                                 pre_context_length, post_context_length, gap_length):
    early_stopping = EarlyStopping(monitor='val_loss', patience=PATIENCE, restore_best_weights=True)

    # Forward model
    forward_input = Input(shape=(pre_context_length, 1))
    x = TCN(nb_filters=32, kernel_size=3, dilations=[1, 2, 4], padding='causal', 
            use_skip_connections=True, dropout_rate=0.2, return_sequences=False)(forward_input)
    forward_output = Dense(1)(x)
    forward_model = Model(forward_input, forward_output)
    forward_model.compile(optimizer=Adam(), loss='mse')
    forward_model.fit(X_f_train, y_f_train, epochs=EPOCHS, batch_size=BATCH_SIZE, 
                      validation_data=(X_f_test, y_f_test), callbacks=[early_stopping], verbose=0)

    # Backward model
    backward_input = Input(shape=(post_context_length, 1))
    x = TCN(nb_filters=32, kernel_size=3, dilations=[1, 2, 4], padding='causal', 
            use_skip_connections=True, dropout_rate=0.2, return_sequences=False)(backward_input)
    backward_output = Dense(1)(x)
    backward_model = Model(backward_input, backward_output)
    backward_model.compile(optimizer=Adam(), loss='mse')
    backward_model.fit(X_b_train, y_b_train, epochs=EPOCHS, batch_size=BATCH_SIZE, 
                       validation_data=(X_b_test, y_b_test), callbacks=[early_stopping], verbose=0)

    return forward_model, backward_model

# Functions for creating and training Context-combined single-step Seq2Seq models (UniSeq2Seq)
# Function for creating and training a UniSeq2Seq LSTM model
def create_and_train_uniseq2seq_lstm(X_f_train, y_f_train, X_f_test, y_f_test, 
                                   X_b_train, y_b_train, X_b_test, y_b_test, 
                                   pre_context_length, post_context_length, gap_length, model_info=None):
    early_stopping = EarlyStopping(monitor='val_loss', patience=PATIENCE, restore_best_weights=True)
    n_features = X_f_train.shape[2]  # Number of features (always 1 for univariate)
    encoder_input_left = Input(shape=(pre_context_length, n_features), name="encoder_input_left")
    encoder_input_right = Input(shape=(post_context_length, n_features), name="encoder_input_right")
    encoder_inputs = Concatenate(axis=1)([encoder_input_left, encoder_input_right])
    encoder = Bidirectional(LSTM(64, return_sequences=False))(encoder_inputs)
    decoder_input = RepeatVector(gap_length)(encoder)
    decoder = LSTM(64, return_sequences=True)(decoder_input)
    decoder_dense = TimeDistributed(Dense(1))(decoder)
    model = Model(inputs=[encoder_input_left, encoder_input_right], outputs=decoder_dense)
    model.compile(optimizer=Adam(), loss='mse')
    model.fit([X_f_train, X_b_train], y_f_train.reshape(-1, gap_length, 1),
              epochs=EPOCHS, batch_size=BATCH_SIZE, 
              validation_data=([X_f_test, X_b_test], y_f_test.reshape(-1, gap_length, 1)),
              callbacks=[early_stopping], verbose=0)
    # Save model_info as a model attribute
    if model_info:
        model.model_info = model_info
    return model, None

# Function for creating and training a UniSeq2Seq GRU model
def create_and_train_uniseq2seq_gru(X_f_train, y_f_train, X_f_test, y_f_test, 
                                    X_b_train, y_b_train, X_b_test, y_b_test, 
                                    pre_context_length, post_context_length, gap_length, model_info=None):
    early_stopping = EarlyStopping(monitor='val_loss', patience=PATIENCE, restore_best_weights=True)
    
    # Inputs: left and right contexts
    encoder_input_left = Input(shape=(pre_context_length, 1), name="encoder_input_left")
    encoder_input_right = Input(shape=(post_context_length, 1), name="encoder_input_right")
    
    # Concatenate contexts along the time axis
    encoder_inputs = Concatenate(axis=1)([encoder_input_left, encoder_input_right])
    
    # Encoder: Bidirectional GRU
    encoder = Bidirectional(GRU(64, return_sequences=False))
    encoder_output = encoder(encoder_inputs)
    
    # Decoder: repeat the hidden state gap_length times
    decoder_input = RepeatVector(gap_length)(encoder_output)
    decoder = GRU(64, return_sequences=True)
    decoder_output = decoder(decoder_input)
    decoder_dense = TimeDistributed(Dense(1))
    decoder_predictions = decoder_dense(decoder_output)
    
    # Create and compile the model
    model = Model(inputs=[encoder_input_left, encoder_input_right], outputs=decoder_predictions)
    model.compile(optimizer=Adam(), loss='mse')
    
    # Model training
    model.fit([X_f_train, X_b_train], y_f_train.reshape(-1, gap_length, 1),
              epochs=EPOCHS, batch_size=BATCH_SIZE, 
              validation_data=([X_f_test, X_b_test], y_f_test.reshape(-1, gap_length, 1)),
              callbacks=[early_stopping], verbose=0)
    if model_info:
        model.model_info = model_info
    return model, None  # UniSeq2Seq uses one model

# Function for creating and training a UniSeq2Seq TCN model
def create_and_train_uniseq2seq_tcn(X_f_train, y_f_train, X_f_test, y_f_test, 
                                    X_b_train, y_b_train, X_b_test, y_b_test, 
                                    pre_context_length, post_context_length, gap_length, model_info=None):
    early_stopping = EarlyStopping(monitor='val_loss', patience=PATIENCE, restore_best_weights=True)
    
    # Inputs: left and right contexts
    encoder_input_left = Input(shape=(pre_context_length, 1), name="encoder_input_left")
    encoder_input_right = Input(shape=(post_context_length, 1), name="encoder_input_right")
    
    # Concatenate contexts along the time axis
    encoder_inputs = Concatenate(axis=1)([encoder_input_left, encoder_input_right])
    
    # Encoder: TCN
    encoder = TCN(nb_filters=64, kernel_size=3, dilations=[1, 2, 4, 8], return_sequences=False)
    encoder_output = encoder(encoder_inputs)
    
    # Decoder: repeat the hidden state gap_length times
    decoder_input = RepeatVector(gap_length)(encoder_output)
    decoder = TCN(nb_filters=64, kernel_size=3, dilations=[1, 2, 4, 8], return_sequences=True)
    decoder_output = decoder(decoder_input)
    decoder_dense = TimeDistributed(Dense(1))
    decoder_predictions = decoder_dense(decoder_output)
    
    # Create and compile the model
    model = Model(inputs=[encoder_input_left, encoder_input_right], outputs=decoder_predictions)
    model.compile(optimizer=Adam(), loss='mse')
    
    # Model training
    model.fit([X_f_train, X_b_train], y_f_train.reshape(-1, gap_length, 1),
              epochs=EPOCHS, batch_size=BATCH_SIZE, 
              validation_data=([X_f_test, X_b_test], y_f_test.reshape(-1, gap_length, 1)),
              callbacks=[early_stopping], verbose=0)
    if model_info:
        model.model_info = model_info
    return model, None  # UniSeq2Seq uses one model

# Function for creating and training a UniSeq2Seq CNN model
def create_and_train_uniseq2seq_cnn(X_f_train, y_f_train, X_f_test, y_f_test, 
                                    X_b_train, y_b_train, X_b_test, y_b_test, 
                                    pre_context_length, post_context_length, gap_length, model_info=None):
    early_stopping = EarlyStopping(monitor='val_loss', patience=PATIENCE, restore_best_weights=True)
    
    # Inputs: left and right contexts
    encoder_input_left = Input(shape=(pre_context_length, 1), name="encoder_input_left")
    encoder_input_right = Input(shape=(post_context_length, 1), name="encoder_input_right")
    
    # Concatenate contexts along the time axis
    encoder_inputs = Concatenate(axis=1)([encoder_input_left, encoder_input_right])
    
    # Encoder: CNN
    x = Conv1D(filters=32, kernel_size=3, activation='relu', padding='causal')(encoder_inputs)
    x = Conv1D(filters=64, kernel_size=3, activation='relu', padding='causal')(x)
    encoder_output = Flatten()(x)  # Flatten to a vector
    latent = Dense(128, activation='relu')(encoder_output)  # Latent representation
    
    # Decoder: transform the latent vector into a sequence
    decoder_dense = Dense(gap_length * 64, activation='relu')(latent)
    x_dec = Reshape((gap_length, 64))(decoder_dense)
    x_dec = Conv1D(filters=32, kernel_size=3, activation='relu', padding='same')(x_dec)
    decoder_predictions = Conv1D(filters=1, kernel_size=3, activation='linear', padding='same')(x_dec)
    
    # Create and compile the model
    model = Model(inputs=[encoder_input_left, encoder_input_right], outputs=decoder_predictions)
    model.compile(optimizer=Adam(), loss='mse')
    
    # Model training
    model.fit([X_f_train, X_b_train], y_f_train.reshape(-1, gap_length, 1),
              epochs=EPOCHS, batch_size=BATCH_SIZE, 
              validation_data=([X_f_test, X_b_test], y_f_test.reshape(-1, gap_length, 1)),
              callbacks=[early_stopping], verbose=0)
    if model_info:
        model.model_info = model_info
    return model, None  # UniSeq2Seq uses one model

# Function for creating and training a UniSeq2Seq RNN model
def create_and_train_uniseq2seq_rnn(X_f_train, y_f_train, X_f_test, y_f_test, 
                                    X_b_train, y_b_train, X_b_test, y_b_test, 
                                    pre_context_length, post_context_length, gap_length, model_info=None):
    early_stopping = EarlyStopping(monitor='val_loss', patience=PATIENCE, restore_best_weights=True)
    
    # Inputs: left and right contexts
    encoder_input_left = Input(shape=(pre_context_length, 1), name="encoder_input_left")
    encoder_input_right = Input(shape=(post_context_length, 1), name="encoder_input_right")
    
    # Concatenate contexts along the time axis
    encoder_inputs = Concatenate(axis=1)([encoder_input_left, encoder_input_right])
    
    # Encoder: Bidirectional SimpleRNN
    encoder = Bidirectional(SimpleRNN(64, return_sequences=False))
    encoder_output = encoder(encoder_inputs)
    
    # Decoder: repeat the hidden state gap_length times
    decoder_input = RepeatVector(gap_length)(encoder_output)
    decoder = SimpleRNN(64, return_sequences=True)
    decoder_output = decoder(decoder_input)
    decoder_dense = TimeDistributed(Dense(1))
    decoder_predictions = decoder_dense(decoder_output)
    
    # Create and compile the model
    model = Model(inputs=[encoder_input_left, encoder_input_right], outputs=decoder_predictions)
    model.compile(optimizer=Adam(), loss='mse')
    
    # Model training
    model.fit([X_f_train, X_b_train], y_f_train.reshape(-1, gap_length, 1),
              epochs=EPOCHS, batch_size=BATCH_SIZE, 
              validation_data=([X_f_test, X_b_test], y_f_test.reshape(-1, gap_length, 1)),
              callbacks=[early_stopping], verbose=0)
    if model_info:
        model.model_info = model_info
    return model, None  # UniSeq2Seq uses one model

# Function for creating and training a UniSeq2Seq Random Forest model
def create_and_train_uniseq2seq_rf(X_f_train, y_f_train, X_f_test, y_f_test, 
                                 X_b_train, y_b_train, X_b_test, y_b_test, 
                                 pre_context_length, post_context_length, gap_length, model_info=None):
    X_train_combined = np.concatenate([X_f_train, X_b_train], axis=1)  # Combine contexts
    X_train_flat = X_train_combined.reshape(X_train_combined.shape[0], -1)  # Flat vector
    y_train_flat = y_f_train.reshape(y_f_train.shape[0], -1)
    X_test_combined = np.concatenate([X_f_test, X_b_test], axis=1)
    X_test_flat = X_test_combined.reshape(X_test_combined.shape[0], -1)
    rf_model = MultiOutputRegressor(RandomForestRegressor(n_estimators=N_ESTIMATORS, random_state=42))
    rf_model.fit(X_train_flat, y_train_flat)
    # Save model_info as a model attribute
    if model_info:
        rf_model.model_info = model_info
    
    return rf_model, None  # UniSeq2Seq uses one model

# Function for creating and training a UniSeq2Seq XGBoost model
def create_and_train_uniseq2seq_xgb(X_f_train, y_f_train, X_f_test, y_f_test, 
                                    X_b_train, y_b_train, X_b_test, y_b_test, 
                                    pre_context_length, post_context_length, gap_length):
    # Convert 3D data (samples, timesteps, features) to 2D (samples, timesteps*features)
    X_train_combined = np.concatenate([X_f_train, X_b_train], axis=1)  # Combine left and right contexts
    X_train_flat = X_train_combined.reshape(X_train_combined.shape[0], -1)  # Flat vector
    y_train_flat = y_f_train.reshape(y_f_train.shape[0], -1)  # Flat vector of the gap
    
    X_test_combined = np.concatenate([X_f_test, X_b_test], axis=1)
    X_test_flat = X_test_combined.reshape(X_test_combined.shape[0], -1)
    
    # Create and train the model
    xgb_model = MultiOutputRegressor(XGBRegressor(n_estimators=N_ESTIMATORS, random_state=42))
    xgb_model.fit(X_train_flat, y_train_flat)
    
    return xgb_model, None  # UniSeq2Seq uses one model


# Functions for creating and training single-step unidirectional autoregressive models (UniAutoreg)
def create_and_train_uniautoreg_lstm(X_f_train, y_f_train, X_f_test, y_f_test, 
                                     X_b_train, y_b_train, X_b_test, y_b_test, 
                                     pre_context_length, post_context_length, gap_length):
    early_stopping = EarlyStopping(monitor='val_loss', patience=PATIENCE, restore_best_weights=True)
    model = Sequential([
        LSTM(64, input_shape=(pre_context_length, 1), return_sequences=False),
        Dense(1)
    ])
    model.compile(optimizer=Adam(), loss='mse')
    model.fit(X_f_train, y_f_train, epochs=EPOCHS, batch_size=BATCH_SIZE, 
              validation_data=(X_f_test, y_f_test), callbacks=[early_stopping], verbose=1)
    return model, None

def create_and_train_uniautoreg_gru(X_f_train, y_f_train, X_f_test, y_f_test, 
                                    X_b_train, y_b_train, X_b_test, y_b_test, 
                                    pre_context_length, post_context_length, gap_length):
    early_stopping = EarlyStopping(monitor='val_loss', patience=PATIENCE, restore_best_weights=True)
    model = Sequential([
        GRU(64, input_shape=(pre_context_length, 1), return_sequences=False),
        Dense(1)
    ])
    model.compile(optimizer=Adam(), loss='mse')
    model.fit(X_f_train, y_f_train, epochs=EPOCHS, batch_size=BATCH_SIZE, 
              validation_data=(X_f_test, y_f_test), callbacks=[early_stopping], verbose=1)
    return model, None

def create_and_train_uniautoreg_tcn(X_f_train, y_f_train, X_f_test, y_f_test, 
                                    X_b_train, y_b_train, X_b_test, y_b_test, 
                                    pre_context_length, post_context_length, gap_length):
    early_stopping = EarlyStopping(monitor='val_loss', patience=PATIENCE, restore_best_weights=True)
    model = Sequential([
        TCN(nb_filters=64, kernel_size=3, dilations=[1, 2, 4, 8], input_shape=(pre_context_length, 1), return_sequences=False),
        Dense(1)
    ])
    model.compile(optimizer=Adam(), loss='mse')
    model.fit(X_f_train, y_f_train, epochs=EPOCHS, batch_size=BATCH_SIZE, 
              validation_data=(X_f_test, y_f_test), callbacks=[early_stopping], verbose=1)
    return model, None

def create_and_train_uniautoreg_cnn(X_f_train, y_f_train, X_f_test, y_f_test, 
                                    X_b_train, y_b_train, X_b_test, y_b_test, 
                                    pre_context_length, post_context_length, gap_length):
    early_stopping = EarlyStopping(monitor='val_loss', patience=PATIENCE, restore_best_weights=True)
    model = Sequential([
        Conv1D(filters=32, kernel_size=3, activation='relu', padding='causal', input_shape=(pre_context_length, 1)),
        Conv1D(filters=64, kernel_size=3, activation='relu', padding='causal'),
        Flatten(),
        Dense(1)
    ])
    model.compile(optimizer=Adam(), loss='mse')
    model.fit(X_f_train, y_f_train, epochs=EPOCHS, batch_size=BATCH_SIZE, 
              validation_data=(X_f_test, y_f_test), callbacks=[early_stopping], verbose=1)
    return model, None

def create_and_train_uniautoreg_rnn(X_f_train, y_f_train, X_f_test, y_f_test, 
                                    X_b_train, y_b_train, X_b_test, y_b_test, 
                                    pre_context_length, post_context_length, gap_length):
    early_stopping = EarlyStopping(monitor='val_loss', patience=PATIENCE, restore_best_weights=True)
    model = Sequential([
        SimpleRNN(64, input_shape=(pre_context_length, 1), return_sequences=False),
        Dense(1)
    ])
    model.compile(optimizer=Adam(), loss='mse')
    model.fit(X_f_train, y_f_train, epochs=EPOCHS, batch_size=BATCH_SIZE, 
              validation_data=(X_f_test, y_f_test), callbacks=[early_stopping], verbose=1)
    return model, None

def create_and_train_uniautoreg_rf(X_f_train, y_f_train, X_f_test, y_f_test, 
                                   X_b_train, y_b_train, X_b_test, y_b_test, 
                                   pre_context_length, post_context_length, gap_length):
    X_train_flat = X_f_train.reshape(X_f_train.shape[0], -1)  # Convert to a flat vector
    X_test_flat = X_f_test.reshape(X_f_test.shape[0], -1)
    model = RandomForestRegressor(n_estimators=N_ESTIMATORS, random_state=42)
    model.fit(X_train_flat, y_f_train.ravel())  # Predict one value
    return model, None

def create_and_train_uniautoreg_xgb(X_f_train, y_f_train, X_f_test, y_f_test, 
                                    X_b_train, y_b_train, X_b_test, y_b_test, 
                                    pre_context_length, post_context_length, gap_length):
    X_train_flat = X_f_train.reshape(X_f_train.shape[0], -1)  # Convert to a flat vector
    X_test_flat = X_f_test.reshape(X_f_test.shape[0], -1)
    model = XGBRegressor(n_estimators=N_ESTIMATORS, random_state=42)
    model.fit(X_train_flat, y_f_train.ravel())  # Predict one value
    return model, None

# Multivariate models
def create_and_train_uniseq2seq_rf_multi(X_f_train, y_f_train, X_f_test, y_f_test, 
                                         X_b_train, y_b_train, X_b_test, y_b_test, 
                                         pre_context_length, post_context_length, gap_length, model_info=None):
    X_train_combined = np.concatenate([X_f_train, X_b_train], axis=1)  # Combine contexts
    X_train_flat = X_train_combined.reshape(X_train_combined.shape[0], -1)  # Flat vector
    y_train_flat = y_f_train.reshape(y_f_train.shape[0], -1)
    X_test_combined = np.concatenate([X_f_test, X_b_test], axis=1)
    X_test_flat = X_test_combined.reshape(X_test_combined.shape[0], -1)
    rf_model = MultiOutputRegressor(RandomForestRegressor(n_estimators=N_ESTIMATORS, random_state=42))
    rf_model.fit(X_train_flat, y_train_flat)
    # Save model_info as a model attribute
    if model_info:
        rf_model.model_info = model_info
    return rf_model, None

def create_and_train_uniseq2seq_lstm_multi(X_f_train, y_f_train, X_f_test, y_f_test, 
                                           X_b_train, y_b_train, X_b_test, y_b_test, 
                                           pre_context_length, post_context_length, gap_length, model_info=None):
    early_stopping = EarlyStopping(monitor='val_loss', patience=PATIENCE, restore_best_weights=True)
    n_features = X_f_train.shape[2]  # Number of features
    encoder_input_left = Input(shape=(pre_context_length, n_features), name="encoder_input_left")
    encoder_input_right = Input(shape=(post_context_length, n_features), name="encoder_input_right")
    encoder_inputs = Concatenate(axis=1)([encoder_input_left, encoder_input_right])
    encoder = Bidirectional(LSTM(64, return_sequences=False))(encoder_inputs)
    decoder_input = RepeatVector(gap_length)(encoder)
    decoder = LSTM(64, return_sequences=True)(decoder_input)
    decoder_dense = TimeDistributed(Dense(1))(decoder)
    model = Model(inputs=[encoder_input_left, encoder_input_right], outputs=decoder_dense)
    model.compile(optimizer=Adam(), loss='mse')
    model.fit([X_f_train, X_b_train], y_f_train.reshape(-1, gap_length, 1),
              epochs=EPOCHS, batch_size=BATCH_SIZE, 
              validation_data=([X_f_test, X_b_test], y_f_test.reshape(-1, gap_length, 1)),
              callbacks=[early_stopping], verbose=0)
    # Save model_info as a model attribute
    if model_info:
        model.model_info = model_info
    return model, None

def create_and_train_xgb_seq2seq_multi(X_f_train, y_f_train, X_f_test, y_f_test, 
                                       X_b_train, y_b_train, X_b_test, y_b_test, 
                                       pre_context_length, post_context_length, gap_length, model_info=None):
    """
    Trains a multivariate XGB Seq2Seq model for bidirectional forecasting.
    Data with multiple features is used, where the input data for the forward and backward
    models have the shape (samples, timesteps, n_features). The forecasts of both models can then be
    combined using the combine_forecasts function.

    Parameters:
      - X_f_train, y_f_train, X_f_test, y_f_test: data for training and testing the forward model.
      - X_b_train, y_b_train, X_b_test, y_b_test: data for training and testing the backward model.
      - pre_context_length: length of the input context for the forward model.
      - post_context_length: length of the input context for the backward model.
      - gap_length: length of the forecasted interval.
      - model_info: additional model parameters (optional).

    Returns:
      - forward_model, backward_model: trained XGBRegressor models for forward and backward forecasting.
    """
    # Convert 3D data (samples, timesteps, n_features) to 2D (samples, timesteps*n_features)
    X_f_train_flat = X_f_train.reshape(X_f_train.shape[0], -1)
    y_f_train_flat = y_f_train.reshape(y_f_train.shape[0], -1)
    X_f_test_flat = X_f_test.reshape(X_f_test.shape[0], -1)
    
    X_b_train_flat = X_b_train.reshape(X_b_train.shape[0], -1)
    y_b_train_flat = y_b_train.reshape(y_b_train.shape[0], -1)
    X_b_test_flat = X_b_test.reshape(X_b_test.shape[0], -1)

    # Train the forward model
    forward_model = XGBRegressor(n_estimators=N_ESTIMATORS, random_state=42)
    forward_model.fit(X_f_train_flat, y_f_train_flat)
    
    # Train the backward model
    backward_model = XGBRegressor(n_estimators=N_ESTIMATORS, random_state=42)
    backward_model.fit(X_b_train_flat, y_b_train_flat)
    
    # If additional model parameters are passed, save them as attributes
    if model_info is not None:
        forward_model.model_info = model_info
        backward_model.model_info = model_info

    return forward_model, backward_model

def create_and_train_uniseq2seq_xgb(X_left_train, y_train, X_left_test, y_test, 
                                    X_right_train, y_train_right, X_right_test, y_test_right, 
                                    pre_context_length, post_context_length, gap_length, model_info=None):
    """
    Trains a multivariate unidirectional UniSeq2Seq XGB model.
    The data is fed as two contexts: left (X_left) and right (X_right). For training,
    both contexts are combined, then flattened, and a single XGB model is trained
    to predict the missing values (the target feature, usually pm2_5).
    
    Parameters:
      - X_left_train, y_train, X_left_test, y_test: training and test data for the left context.
      - X_right_train, y_train_right, X_right_test, y_test_right: training and test data for the right context.
         (Usually y_train and y_train_right are the same, as the target is taken from the left context.)
      - pre_context_length: length of the left context.
      - post_context_length: length of the right context.
      - gap_length: length of the forecasted interval (number of time steps to predict).
      - model_info: additional information (e.g., feature_cols).
      
    Returns:
      - xgb_model: trained XGBRegressor.
      - None: only one network is used in this model.
    """
    # Combine left and right contexts along the time axis
    X_train_combined = np.concatenate([X_left_train, X_right_train], axis=1)
    X_train_flat = X_train_combined.reshape(X_train_combined.shape[0], -1)
    y_train_flat = y_train.reshape(y_train.shape[0], -1)  # target feature (univariate)
    
    X_test_combined = np.concatenate([X_left_test, X_right_test], axis=1)
    X_test_flat = X_test_combined.reshape(X_test_combined.shape[0], -1)
    
    # Train the XGBoost model
    xgb_model = XGBRegressor(n_estimators=N_ESTIMATORS, random_state=42)
    xgb_model.fit(X_train_flat, y_train_flat)
    
    if model_info:
        xgb_model.model_info = model_info
        
    return xgb_model, None

def create_and_train_rf_seq2seq_multi(X_f_train, y_f_train, X_f_test, y_f_test, 
                                      X_b_train, y_b_train, X_b_test, y_b_test, 
                                      pre_context_length, post_context_length, gap_length, model_info=None):
    """
    Trains a multivariate bidirectional RF Seq2Seq model.
    
    Parameters:
      - X_f_train, y_f_train, X_f_test, y_f_test:
          Data for training and testing the forward model.
          X_f_train has the shape (samples, pre_context_length, n_features),
          and y_f_train has (samples, gap_length, 1) (the target feature is taken from the first column).
      - X_b_train, y_b_train, X_b_test, y_b_test:
          Data for training and testing the backward model.
          X_b_train has the shape (samples, post_context_length, n_features),
          and y_b_train has (samples, gap_length, 1).
      - pre_context_length: length of the left context.
      - post_context_length: length of the right context.
      - gap_length: length of the forecasted interval.
      - model_info: dictionary with additional information (e.g., feature_cols).
      
    Returns:
      - forward_model, backward_model: trained RandomForestRegressor models for forward and backward forecasting.
    """
    # Convert forward data: (samples, timesteps, n_features) -> (samples, timesteps*n_features)
    X_f_train_flat = X_f_train.reshape(X_f_train.shape[0], -1)
    y_f_train_flat = y_f_train.reshape(y_f_train.shape[0], -1)
    X_f_test_flat = X_f_test.reshape(X_f_test.shape[0], -1)
    
    # Same for backward data
    X_b_train_flat = X_b_train.reshape(X_b_train.shape[0], -1)
    y_b_train_flat = y_b_train.reshape(y_b_train.shape[0], -1)
    X_b_test_flat = X_b_test.reshape(X_b_test.shape[0], -1)
    
    # Train the forward model
    forward_model = RandomForestRegressor(n_estimators=N_ESTIMATORS, random_state=42)
    forward_model.fit(X_f_train_flat, y_f_train_flat)
    
    # Train the backward model
    backward_model = RandomForestRegressor(n_estimators=N_ESTIMATORS, random_state=42)
    backward_model.fit(X_b_train_flat, y_b_train_flat)
    
    if model_info is not None:
        forward_model.model_info = model_info
        backward_model.model_info = model_info
        
    return forward_model, backward_model

def create_and_train_uniautoreg_xgb_multi(X_f_train, y_train, X_f_test, y_test,
                                          X_b_train, y_b_train, X_b_test, y_test_b,
                                          pre_context_length, post_context_length, gap_length, model_info=None):
    """
    Trains a multivariate unidirectional autoregressive XGB model (UniAR XGB Multi).
    Only forward data is used.
    
    Parameters:
      - X_f_train: array with shape (samples, pre_context_length, n_total_features) for forward data.
      - y_train: target values with shape (samples, 1) (the target feature is taken from the first column of the original data).
      - Other parameters are passed for compatibility.
      - model_info: if passed and contains the key "feature_cols", then only these columns are selected from X_f_train and X_f_test.
    
    Returns:
      - xgb_model: trained XGBRegressor model.
      - None: for compatibility.
    """
    if model_info is not None and "feature_cols" in model_info:
        fc = model_info["feature_cols"]
        # Extract only the selected features along the last axis
        X_f_train = X_f_train[:, :, fc]
        X_f_test = X_f_test[:, :, fc]
    X_train_flat = X_f_train.reshape(X_f_train.shape[0], -1)
    y_train_flat = y_train.ravel()
    
    xgb_model = XGBRegressor(n_estimators=N_ESTIMATORS, random_state=42)
    xgb_model.fit(X_train_flat, y_train_flat)
    
    if model_info is not None:
        xgb_model.model_info = model_info
        
    return xgb_model, None

def create_and_train_uniseq2seq_cnn_multi(X_f_train, y_train, X_f_test, y_test, 
                                          X_b_train, y_train_b, X_b_test, y_test_b, 
                                          pre_context_length, post_context_length, gap_length, model_info=None):
    """
    Trains a multivariate unidirectional UniSeq2Seq CNN Multi model.
    Only forward data is used, so arguments related to backward data are ignored.
    
    Parameters:
      - X_f_train, y_train, X_f_test, y_test: data for the forward model.
      - X_b_train, y_train_b, X_b_test, y_test_b: not used, but passed for compatibility.
      - pre_context_length: context length.
      - post_context_length: not used (can be set, but has no effect).
      - gap_length: length of the forecasted interval.
      - model_info: additional information (e.g., feature_cols).
      
    Returns:
      - model: trained CNN model that outputs a forecast of length gap_length.
      - None.
    """
    # Determine the number of selected features using data from model_info (if provided)
    if model_info is not None and "feature_cols" in model_info:
        # Assume feature_cols is a list of indices
        n_features = len(model_info["feature_cols"])
    else:
        n_features = X_f_train.shape[2]  # if not specified, take all features

    input_layer = Input(shape=(pre_context_length, n_features))
    x = Conv1D(filters=32, kernel_size=3, activation='relu', padding='causal')(input_layer)
    x = Conv1D(filters=32, kernel_size=3, activation='relu', padding='causal')(x)
    x = Flatten()(x)
    output_layer = Dense(gap_length)(x)
    model = Model(inputs=input_layer, outputs=output_layer)
    model.compile(optimizer=Adam(), loss='mse')
    early_stopping = EarlyStopping(monitor='val_loss', patience=PATIENCE, restore_best_weights=True)
    model.fit(X_f_train, y_train.reshape(y_train.shape[0], gap_length),
              epochs=EPOCHS, batch_size=BATCH_SIZE, 
              validation_data=(X_f_test, y_test.reshape(y_test.shape[0], gap_length)),
              callbacks=[early_stopping], verbose=0)
    if model_info:
        model.model_info = model_info
    return model, None


def create_and_train_xgb_autoreg_multi(
    X_f_train, y_f_train, X_f_test, y_f_test,
    X_b_train, y_b_train, X_b_test, y_b_test,
    pre_context_length, post_context_length, gap_length,
    model_info=None
):
    """
    Trains two XGBoost models (forward and backward) in a multivariate bidirectional
    autoregression. It is assumed that X_f_train and X_b_train have the shape:
      (n_samples, pre_context_length (or post_context_length), n_features),
    where n_features = len(feature_cols).

    Parameters:
      - X_f_train, y_f_train, X_f_test, y_f_test: data for the forward model.
      - X_b_train, y_b_train, X_b_test, y_b_test: data for the backward model.
      - pre_context_length, post_context_length: context sizes on the left and right.
      - gap_length: length of the gap (number of points to restore).
      - model_info: for storing auxiliary information (e.g., feature_cols).

    Returns:
      - forward_model, backward_model: trained XGBRegressor models.
    """

    # If a list of features feature_cols is given, extract them
    if model_info is not None and "feature_cols" in model_info:
        feature_cols = model_info["feature_cols"]
        # Keep only the required features along the last axis
        X_f_train = X_f_train[:, :, feature_cols]  # (samples, pre_context_length, n_selected)
        X_f_test  = X_f_test[:,  :, feature_cols]
        X_b_train = X_b_train[:, :, feature_cols]  # (samples, post_context_length, n_selected)
        X_b_test  = X_b_test[:,  :, feature_cols]

    # Convert 3D (samples, timesteps, features) -> 2D (samples, timesteps * features)
    X_f_train_flat = X_f_train.reshape(X_f_train.shape[0], -1)
    X_f_test_flat  = X_f_test.reshape(X_f_test.shape[0],   -1)
    y_f_train_flat = y_f_train.ravel()   # target variable (1D)
    y_f_test_flat  = y_f_test.ravel()

    X_b_train_flat = X_b_train.reshape(X_b_train.shape[0], -1)
    X_b_test_flat  = X_b_test.reshape(X_b_test.shape[0],   -1)
    y_b_train_flat = y_b_train.ravel()
    y_b_test_flat  = y_b_test.ravel()

    # Create and train the forward model
    forward_model = XGBRegressor(n_estimators=50, random_state=42)
    forward_model.fit(X_f_train_flat, y_f_train_flat)

    # Create and train the backward model
    backward_model = XGBRegressor(n_estimators=50, random_state=42)
    backward_model.fit(X_b_train_flat, y_b_train_flat)

    # Save feature_cols inside the models if desired
    if model_info:
        forward_model.model_info = model_info
        backward_model.model_info = model_info

    return forward_model, backward_model









# Functions for direct Seq2Seq forecasting
def direct_seq2seq_forecast_tcn(model, initial_context, steps):
    input_seq = initial_context.reshape(1, initial_context.shape[0], 1)
    prediction = model.predict(input_seq, verbose=0)
    return prediction.reshape(-1, 1)

def direct_seq2seq_forecast_lstm(model, initial_context, steps):
    input_seq = initial_context.reshape(1, initial_context.shape[0], 1)
    prediction = model.predict(input_seq, verbose=0)
    return prediction.reshape(-1, 1)

def direct_seq2seq_forecast_gru(model, initial_context, steps):
    input_seq = initial_context.reshape(1, initial_context.shape[0], 1)
    prediction = model.predict(input_seq, verbose=0)
    return prediction.reshape(-1, 1)

def direct_seq2seq_forecast_cnn(model, initial_context, steps):
    input_seq = initial_context.reshape(1, initial_context.shape[0], 1)
    prediction = model.predict(input_seq, verbose=0)
    return prediction.reshape(-1, 1)

def direct_seq2seq_forecast_rf(model, initial_context, steps):
    input_flat = initial_context.reshape(1, -1)  # Convert 2D to 1D for RandomForest
    prediction = model.predict(input_flat)
    return prediction.reshape(-1, 1)

def direct_seq2seq_forecast_xgb(model, initial_context, steps):
    input_flat = initial_context.reshape(1, -1)  # Convert 2D to 1D for XGBoost
    prediction = model.predict(input_flat)
    return prediction.reshape(-1, 1)

def direct_seq2seq_forecast_rnn(model, initial_context, steps):
    input_seq = initial_context.reshape(1, initial_context.shape[0], 1)
    prediction = model.predict(input_seq, verbose=0)
    return prediction.reshape(-1, 1)

# Functions for direct autoregressive forecasting
def direct_autoreg_forecast_rnn(model, initial_context, gap_length):
    predictions = []
    current_context = initial_context.copy()
    for _ in range(gap_length):
        input_seq = current_context.reshape(1, current_context.shape[0], 1)
        pred = model.predict(input_seq, verbose=0)[0][0]
        predictions.append(pred)
        current_context = np.roll(current_context, -1)
        current_context[-1] = pred
    return np.array(predictions).reshape(-1, 1)

def direct_autoreg_forecast_lstm(model, initial_context, gap_length):
    predictions = []
    current_context = initial_context.copy()
    for i in range(gap_length):
        input_seq = current_context.reshape(1, current_context.shape[0], 1)
        pred = model.predict(input_seq, verbose=0)[0][0]
        predictions.append(pred)
        current_context = np.roll(current_context, -1)
        current_context[-1] = pred
    return np.array(predictions).reshape(-1, 1)

def direct_autoreg_forecast_gru(model, initial_context, gap_length):
    predictions = []
    current_context = initial_context.copy()
    for _ in range(gap_length):
        input_seq = current_context.reshape(1, current_context.shape[0], 1)
        pred = model.predict(input_seq, verbose=0)[0][0]
        predictions.append(pred)
        current_context = np.roll(current_context, -1)
        current_context[-1] = pred
    return np.array(predictions).reshape(-1, 1)

def direct_autoreg_forecast_cnn(model, initial_context, gap_length):
    predictions = []
    current_context = initial_context.copy()
    for _ in range(gap_length):
        input_seq = current_context.reshape(1, current_context.shape[0], 1)
        pred = model.predict(input_seq, verbose=0)[0][0]
        predictions.append(pred)
        current_context = np.roll(current_context, -1)
        current_context[-1] = pred
    return np.array(predictions).reshape(-1, 1)

def direct_autoreg_forecast_rf(model, initial_context, gap_length):
    predictions = []
    current_context = initial_context.copy()
    for _ in range(gap_length):
        input_flat = current_context.reshape(1, -1)  # Convert to 2D for RandomForest
        pred = model.predict(input_flat)[0]
        predictions.append(pred)
        current_context = np.roll(current_context, -1)
        current_context[-1] = pred
    return np.array(predictions).reshape(-1, 1)

def direct_autoreg_forecast_xgb(model, initial_context, gap_length):
    predictions = []
    current_context = initial_context.copy()
    for _ in range(gap_length):
        input_flat = current_context.reshape(1, -1)  # Convert to 2D for XGBoost
        pred = model.predict(input_flat)[0]
        predictions.append(pred)
        current_context = np.roll(current_context, -1)
        current_context[-1] = pred
    return np.array(predictions).reshape(-1, 1)

def direct_autoreg_forecast_tcn(model, initial_context, gap_length):
    predictions = []
    current_context = initial_context.copy()
    for _ in range(gap_length):
        input_seq = current_context.reshape(1, current_context.shape[0], 1)
        pred = model.predict(input_seq, verbose=0)[0][0]
        predictions.append(pred)
        current_context = np.roll(current_context, -1)
        current_context[-1] = pred
    return np.array(predictions).reshape(-1, 1)

# Forecasting function for UniSeq2Seq LSTM
def direct_uniseq2seq_forecast_gru(model, initial_context, gap_length,
                                   pre_context_length=32, post_context_length=32):
    # Extract left and right chunks strictly according to the specified sizes:
    pre_context = initial_context[:pre_context_length]    # (32, n_features)
    post_context = initial_context[-post_context_length:] # (32, n_features)

    # Add batch dimension: (1, pre_context_length, n_features)
    input_left = pre_context.reshape(1, pre_context_length, pre_context.shape[1])
    input_right = post_context.reshape(1, post_context_length, post_context.shape[1])

    # Forecast
    prediction = model.predict([input_left, input_right], verbose=0)
    return prediction.reshape(-1, 1)

# Forecasting function for UniSeq2Seq TCN
def direct_uniseq2seq_forecast_tcn(model, initial_context, gap_length,
                                   pre_context_length=32, post_context_length=32):
    # Extract left and right chunks strictly according to the specified sizes:
    pre_context = initial_context[:pre_context_length]    # (32, n_features)
    post_context = initial_context[-post_context_length:] # (32, n_features)

    # Add batch dimension: (1, pre_context_length, n_features)
    input_left = pre_context.reshape(1, pre_context_length, pre_context.shape[1])
    input_right = post_context.reshape(1, post_context_length, post_context.shape[1])

    # Forecast
    prediction = model.predict([input_left, input_right], verbose=0)
    return prediction.reshape(-1, 1)

# Forecasting function for UniSeq2Seq CNN
def direct_uniseq2seq_forecast_cnn(model, initial_context, gap_length,
                                   pre_context_length=32, post_context_length=32):
# Extract left and right chunks strictly according to the specified sizes:
    pre_context = initial_context[:pre_context_length]    # (32, n_features)
    post_context = initial_context[-post_context_length:] # (32, n_features)

    # Add batch dimension: (1, pre_context_length, n_features)
    input_left = pre_context.reshape(1, pre_context_length, pre_context.shape[1])
    input_right = post_context.reshape(1, post_context_length, post_context.shape[1])

    prediction = model.predict([input_left, input_right], verbose=0)
    return prediction.reshape(-1, 1)

# Forecasting function for UniSeq2Seq RNN
def direct_uniseq2seq_forecast_rnn(model, initial_context, gap_length,
                                   pre_context_length=32, post_context_length=32):
# Extract left and right chunks strictly according to the specified sizes:
    pre_context = initial_context[:pre_context_length]    # (32, n_features)
    post_context = initial_context[-post_context_length:] # (32, n_features)

    # Add batch dimension: (1, pre_context_length, n_features)
    input_left = pre_context.reshape(1, pre_context_length, pre_context.shape[1])
    input_right = post_context.reshape(1, post_context_length, post_context.shape[1])

    prediction = model.predict([input_left, input_right], verbose=0)
    return prediction.reshape(-1, 1)

# Forecasting function for UniSeq2Seq Random Forest
def direct_uniseq2seq_forecast_rf(model, initial_context, gap_length):
    # initial_context is the combined context (pre_context + post_context)
    context_flat = initial_context.reshape(1, -1)  # Convert to a flat vector
    prediction = model.predict(context_flat)
    return prediction.reshape(-1, 1)

# Forecasting function for UniSeq2Seq XGBoost
def direct_uniseq2seq_forecast_xgb(model, initial_context, gap_length):
    # initial_context is the combined context (pre_context + post_context)
    context_flat = initial_context.reshape(1, -1)  # Convert to a flat vector
    prediction = model.predict(context_flat)
    return prediction.reshape(-1, 1)

def direct_uniseq2seq_forecast_lstm(model, initial_context, gap_length,
                                    pre_context_length=32, post_context_length=32):
    """
    Forecast for the UniSeq2Seq LSTM model, which accepts [left_context, right_context].
    initial_context has the shape (pre_context_length + post_context_length, n_features).
    """
    # Extract left and right chunks strictly according to the specified sizes:
    pre_context = initial_context[:pre_context_length]    # (32, n_features)
    post_context = initial_context[-post_context_length:] # (32, n_features)

    # Add batch dimension: (1, pre_context_length, n_features)
    input_left = pre_context.reshape(1, pre_context_length, pre_context.shape[1])
    input_right = post_context.reshape(1, post_context_length, post_context.shape[1])

    # Forecast
    prediction = model.predict([input_left, input_right], verbose=0)
    return prediction.reshape(-1, 1)

# Forecasting functions for UniAutoreg 
def direct_uniautoreg_forecast_lstm(model, initial_context, gap_length):
    predictions = []
    current_context = initial_context.copy()
    for _ in range(gap_length):
        input_seq = current_context.reshape(1, current_context.shape[0], 1)
        pred = model.predict(input_seq, verbose=0)[0][0]
        predictions.append(pred)
        current_context = np.roll(current_context, -1)
        current_context[-1] = pred
    return np.array(predictions).reshape(-1, 1)

def direct_uniautoreg_forecast_gru(model, initial_context, gap_length):
    predictions = []
    current_context = initial_context.copy()
    for _ in range(gap_length):
        input_seq = current_context.reshape(1, current_context.shape[0], 1)
        pred = model.predict(input_seq, verbose=0)[0][0]
        predictions.append(pred)
        current_context = np.roll(current_context, -1)
        current_context[-1] = pred
    return np.array(predictions).reshape(-1, 1)

def direct_uniautoreg_forecast_tcn(model, initial_context, gap_length):
    predictions = []
    current_context = initial_context.copy()
    for _ in range(gap_length):
        input_seq = current_context.reshape(1, current_context.shape[0], 1)
        pred = model.predict(input_seq, verbose=0)[0][0]
        predictions.append(pred)
        current_context = np.roll(current_context, -1)
        current_context[-1] = pred
    return np.array(predictions).reshape(-1, 1)

def direct_uniautoreg_forecast_cnn(model, initial_context, gap_length):
    predictions = []
    current_context = initial_context.copy()
    for _ in range(gap_length):
        input_seq = current_context.reshape(1, current_context.shape[0], 1)
        pred = model.predict(input_seq, verbose=0)[0][0]
        predictions.append(pred)
        current_context = np.roll(current_context, -1)
        current_context[-1] = pred
    return np.array(predictions).reshape(-1, 1)

def direct_uniautoreg_forecast_rnn(model, initial_context, gap_length):
    predictions = []
    current_context = initial_context.copy()
    for _ in range(gap_length):
        input_seq = current_context.reshape(1, current_context.shape[0], 1)
        pred = model.predict(input_seq, verbose=0)[0][0]
        predictions.append(pred)
        current_context = np.roll(current_context, -1)
        current_context[-1] = pred
    return np.array(predictions).reshape(-1, 1)

def direct_uniautoreg_forecast_rf(model, initial_context, gap_length):
    predictions = []
    current_context = initial_context.copy()
    for _ in range(gap_length):
        input_flat = current_context.reshape(1, -1)
        pred = model.predict(input_flat)[0]
        predictions.append(pred)
        current_context = np.roll(current_context, -1)
        current_context[-1] = pred
    return np.array(predictions).reshape(-1, 1)

def direct_uniautoreg_forecast_xgb(model, initial_context, gap_length):
    predictions = []
    current_context = initial_context.copy()
    for _ in range(gap_length):
        input_flat = current_context.reshape(1, -1)
        pred = model.predict(input_flat)[0]
        predictions.append(pred)
        current_context = np.roll(current_context, -1)
        current_context[-1] = pred
    return np.array(predictions).reshape(-1, 1)

# Forecasting functions for UniSeq2Seq LSTM (multi-factor)
def direct_uniseq2seq_forecast_lstm_multi(
    model, initial_context, gap_length,
    pre_context_length=32, post_context_length=32
):
    """
    Forecasting for the multivariate unidirectional UniSeq2Seq LSTM Multi model.

    :param model: trained model.
    :param initial_context: combined array of size (pre_context_length + post_context_length, n_features).
    :param gap_length: length of the forecasted interval.
    :param pre_context_length: length of the left context (default 32).
    :param post_context_length: length of the right context (default 32).

    Returns an array of shape (gap_length, 1).
    """
    # Check the shape of initial_context, it should be (32+32, n_features) if you want exactly 32 / 32.
    left_part  = initial_context[:pre_context_length]      # (32, n_features)
    right_part = initial_context[-post_context_length:]    # (32, n_features)

    # Add batch dimension
    left_part  = left_part.reshape(1, pre_context_length,  left_part.shape[1]) 
    right_part = right_part.reshape(1, post_context_length, right_part.shape[1])

    prediction = model.predict([left_part, right_part], verbose=0)
    return prediction.reshape(-1, 1)

# Forecasting functions for UniSeq2Seq RF (multi-factor)
def direct_uniseq2seq_forecast_rf_multi(model, initial_context, gap_length):
    context_flat = initial_context.reshape(1, -1)
    prediction = model.predict(context_flat)
    return prediction.reshape(-1, 1)

def direct_seq2seq_forecast_xgb_multi(model, initial_context, gap_length):
    """
    Forecasting for multivariate XGB Seq2Seq.
    initial_context is expected to be a combined context with shape (timesteps, n_features),
    where n_features corresponds to the number of features selected via feature_cols.
    The function converts this context into a 1D vector and returns the model's forecast.
    """
    context_flat = initial_context.reshape(1, -1)
    prediction = model.predict(context_flat)
    return prediction.reshape(-1, 1)

def direct_uniseq2seq_forecast_xgb(model, initial_context, gap_length):
    """
    Forecasting for the multivariate unidirectional UniSeq2Seq XGB model.
    initial_context is expected to be a combined context (e.g., concatenation of left and right contexts)
    with shape (pre_context_length + post_context_length, n_features). The function converts it to a 1D vector
    and returns the model's forecast as an array with shape (gap_length, 1).
    """
    context_flat = initial_context.reshape(1, -1)
    prediction = model.predict(context_flat)
    return prediction.reshape(-1, 1)

def direct_seq2seq_forecast_rf_multi(model, initial_context, gap_length):
    """
    Forecasting for the multivariate bidirectional RF Seq2Seq model.
    
    initial_context is expected to be a combined context with shape 
    (timesteps, n_features), where timesteps equals the length of the input context (pre_context or post_context).
    The function converts the context into a 1D vector and returns the model's forecast as an array of shape (gap_length, 1).
    """
    context_flat = initial_context.reshape(1, -1)
    prediction = model.predict(context_flat)
    return prediction.reshape(-1, 1)

def direct_uniautoreg_forecast_xgb_multi(model, initial_context, gap_length):
    """
    Forecasting for the multivariate unidirectional autoregressive XGB model (UniAR XGB Multi).
    
    Parameters:
      - model: trained XGBRegressor model.
      - initial_context: current context with shape (pre_context_length, n_selected),
                         where n_selected = len(feature_cols).
      - gap_length: number of steps to forecast ahead.
    
    The function performs an autoregressive loop: at each step, the window is converted into a 1D vector,
    a single value for the target feature is predicted, then the window is updated (shifted by one step)
    and the predicted value for the first feature is inserted into the last row.
    
    Returns:
      - An array of predictions of shape (gap_length, 1).
    """
    predictions = []
    current_context = initial_context.copy()  # (pre_context_length, n_selected)
    for _ in range(gap_length):
        input_flat = current_context.reshape(1, -1)
        pred = model.predict(input_flat)
        pred = np.array(pred)
        if pred.ndim > 1:
            scalar_pred = pred[0, 0]
        else:
            scalar_pred = pred[0]
        predictions.append(scalar_pred)
        current_context = np.roll(current_context, -1, axis=0)
        current_context[-1, 0] = scalar_pred
    return np.array(predictions).reshape(-1, 1)

def direct_uniseq2seq_forecast_cnn_multi(model, initial_context, gap_length):
    """
    Forecasting for the multivariate unidirectional UniSeq2Seq CNN Multi model.
    initial_context has the shape (pre_context_length, n_selected), where n_selected = len(feature_cols).
    The function converts it to the format (1, pre_context_length, n_selected), gets the forecast, and returns an array of shape (gap_length, 1).
    """
    input_data = initial_context.reshape(1, initial_context.shape[0], initial_context.shape[1])
    prediction = model.predict(input_data, verbose=0)
    return prediction.reshape(-1, 1)

def direct_autoreg_forecast_xgb_multi(model, initial_context, gap_length):
    """
    Forecasting for the multivariate bidirectional XGB autoregression (Forward or Backward model).
    
    Parameters:
      - model: trained XGBRegressor model.
      - initial_context: array of shape (context_length, n_selected),
                         where n_selected = len(feature_cols).
      - gap_length: number of consecutive forecasting steps.
    
    Mechanism:
      - At each step, the current context (last context_length steps) is taken,
        flattened into a vector, and fed into the XGB model to forecast "+1 step".
      - The resulting prediction is inserted at the "tail" of the context (shifting the window forward).
    Returns:
      - An array of predictions of shape (gap_length, 1).
    """
    predictions = []
    current_context = initial_context.copy()  # (context_length, n_selected)
    for _ in range(gap_length):
        # Convert window (context_length, n_selected) -> (1, context_length*n_selected)
        input_flat = current_context.reshape(1, -1)
        # Predict
        pred = model.predict(input_flat)[0]
        # Save
        predictions.append(pred)
        # Shift the window: the last row "is removed", and the predicted value for the target column [0] is inserted in its place
        current_context = np.roll(current_context, -1, axis=0)
        current_context[-1, 0] = pred  # only the first column (0) is the target feature
    return np.array(predictions).reshape(-1, 1)




def parse_model_indices(indices_str):
    indices = set()
    parts = indices_str.split(',')
    for part in parts:
        if '-' in part:
            start, end = map(int, part.split('-'))
            indices.update(range(start, end + 1))
        else:
            indices.add(int(part.strip()))
    return sorted(indices)



In [None]:

if __name__ == "__main__":
    df, data_scaled, scaler = load_and_preprocess_data()


    # List of models with parameters: (name, training_function, forecasting_function, forecast_type, [window_size], [poly_degree])
    models_to_add = [
        # Simple Imputers 1-2
        ("Simple Imputer Mean", None, None, "simple"),      #1
        ("Simple Imputer Median", None, None, "simple"),    #2
        
        # Local Imputers 3-4
        ("Local Imputation Mean", None, None, "local", None),   #3
        ("Local Imputation Median", None, None, "local", None), #4
        
        # Window Interpolation Methods 5-8
        ("Linear Window Interpolation", None, None, "window", 10),          #5
        ("Polynomial Window Interpolation", None, None, "window", None, 3), #6
        ("B-spline Window Interpolation", None, None, "window", None),      #7
        ("ARIMA Imputation", None, None, "window", 100),                    #8
        
        # Unidirectional Autoregression Methods (UniAutoreg) 9-15
        ("UniAR LSTM", create_and_train_uniautoreg_lstm, direct_uniautoreg_forecast_lstm, "uniautoreg"), #9
        ("UniAR RNN", create_and_train_uniautoreg_rnn, direct_uniautoreg_forecast_rnn, "uniautoreg"),   #10
        ("UniAR CNN", create_and_train_uniautoreg_cnn, direct_uniautoreg_forecast_cnn, "uniautoreg"),   #11
        ("UniAR RF", create_and_train_uniautoreg_rf, direct_uniautoreg_forecast_rf, "uniautoreg"),      #12
        ("UniAR XGB", create_and_train_uniautoreg_xgb, direct_uniautoreg_forecast_xgb, "uniautoreg"),   #13
        ("UniAR GRU", create_and_train_uniautoreg_gru, direct_uniautoreg_forecast_gru, "uniautoreg"),   #14
        ("UniAR TCN", create_and_train_uniautoreg_tcn, direct_uniautoreg_forecast_tcn, "uniautoreg"),   #15
        
        # Unidirectional seq2seq Methods (UniSeq2Seq) 16-22
        ("UniSeq2Seq RNN", create_and_train_uniseq2seq_rnn, direct_uniseq2seq_forecast_rnn, "uniseq2seq"),      #16
        ("UniSeq2Seq CNN", create_and_train_uniseq2seq_cnn, direct_uniseq2seq_forecast_cnn, "uniseq2seq"),      #17
        ("UniSeq2Seq RF", create_and_train_uniseq2seq_rf, direct_uniseq2seq_forecast_rf, "uniseq2seq"),         #18
        ("UniSeq2Seq XGB", create_and_train_uniseq2seq_xgb, direct_uniseq2seq_forecast_xgb, "uniseq2seq"),      #19
        ("UniSeq2Seq LSTM", create_and_train_uniseq2seq_lstm, direct_uniseq2seq_forecast_lstm, "uniseq2seq"),   #20 
        ("UniSeq2Seq GRU", create_and_train_uniseq2seq_gru, direct_uniseq2seq_forecast_gru, "uniseq2seq"),      #21
        ("UniSeq2Seq TCN", create_and_train_uniseq2seq_tcn, direct_uniseq2seq_forecast_tcn, "uniseq2seq"),      #22
        
        # Bidirectional Autoregression Methods (autoreg) 23-29
        ("RNN Autoreg", create_and_train_rnn_autoreg, direct_autoreg_forecast_rnn, "autoreg"),      #23
        ("CNN Autoreg", create_and_train_cnn_autoreg, direct_autoreg_forecast_cnn, "autoreg"),      #24
        ("RF Autoreg", create_and_train_rf_autoreg, direct_autoreg_forecast_rf, "autoreg"),         #25
        ("XGB Autoreg", create_and_train_xgb_autoreg, direct_autoreg_forecast_xgb, "autoreg"),      #26
        ("LSTM Autoreg", create_and_train_lstm_autoreg, direct_autoreg_forecast_lstm, "autoreg"),   #27
        ("GRU Autoreg", create_and_train_gru_autoreg, direct_autoreg_forecast_gru, "autoreg"),      #28
        ("TCN Autoreg", create_and_train_tcn_autoreg, direct_autoreg_forecast_tcn, "autoreg"),      #29
        
        # Bidirectional seq2seq Methods (Seq2Seq) 30-36
        ("RNN Seq2Seq", create_and_train_rnn_seq2seq, direct_seq2seq_forecast_rnn, "seq2seq"),      #30
        ("CNN Seq2Seq", create_and_train_cnn_seq2seq, direct_seq2seq_forecast_cnn, "seq2seq"),      #31
        ("RF Seq2Seq", create_and_train_rf_seq2seq, direct_seq2seq_forecast_rf, "seq2seq"),         #32
        ("XGB Seq2Seq", create_and_train_xgb_seq2seq, direct_seq2seq_forecast_xgb, "seq2seq"),      #33
        ("LSTM Seq2Seq", create_and_train_lstm_seq2seq, direct_seq2seq_forecast_lstm, "seq2seq"),   #34
        ("GRU Seq2Seq", create_and_train_gru_seq2seq, direct_seq2seq_forecast_gru, "seq2seq"),      #35
        ("TCN Seq2Seq", create_and_train_tcn_seq2seq, direct_seq2seq_forecast_tcn, "seq2seq")       #36
    ]

    # Set the indices of the models to include
    user_input = "13, 17-20, 26, 32, 33"
    # user_input = "1-36"
    model_indices = parse_model_indices(user_input)

    print("Models to add:", sorted(model_indices))

    combiner = ImputationCombiner(df, data_scaled, scaler)

    # Add models by index
    for idx in sorted(model_indices):
        if idx - 1 < len(models_to_add):
            tup = models_to_add[idx - 1]
            model_name = tup[0]
            train_func = tup[1]
            forecast_func = tup[2]
            forecast_type = tup[3]
            feature_cols = tup[4] if len(tup) > 4 else [0]

            combiner.add_model(
                model_name,
                model_func=train_func,
                forecast_func=forecast_func,
                forecast_type=forecast_type,
                feature_cols=feature_cols
            )
            print(f"Added model [{idx}]: {model_name}")

    # Start testing
    combiner.run_tests(gap_lengths=[5, 12, 24, 48, 72], n_runs=5)
    combiner.summarize_results(visualize=True, output_dir="plots_uni")

In [None]:
if __name__ == "__main__":
    df, data_scaled, scaler = load_and_preprocess_data(multivariate=True)
    print("Shape of data_scaled for multivariate:", data_scaled.shape)
    print("Type of data_scaled:", type(data_scaled))

    combiner_multi = ImputationCombiner(df, data_scaled, scaler)

    # numeric_cols = ['pm2_5', 'air_temperature', 'air_humidity', 'T', 'P0', 'P', 'U', 'DD', 'Ff', 'VV', 'pm2_5_original', 'hour', 'season']
        
    models_to_add = [
        ("UniSeq2Seq LSTM Multi", create_and_train_uniseq2seq_lstm_multi, direct_uniseq2seq_forecast_lstm_multi, "uniseq2seq", [0, 1, 2, 7, 8, 11, 12]),  #1
        ("UniSeq2Seq RF Multi", create_and_train_uniseq2seq_rf_multi, direct_uniseq2seq_forecast_rf_multi, "uniseq2seq", [0, 1, 2, 7, 8, 11, 12]),        #2
        ("XGB Seq2Seq Multi", create_and_train_xgb_seq2seq_multi, direct_seq2seq_forecast_xgb_multi, "seq2seq", [0, 1, 2, 7, 8, 11, 12]),                 #3
        ("UniSeq2Seq XGB Multi", create_and_train_uniseq2seq_xgb, direct_uniseq2seq_forecast_xgb, "uniseq2seq", [0, 1, 2, 7, 8, 11, 12]),                 #4
        ("Seq2Seq RF Multi", create_and_train_rf_seq2seq_multi, direct_seq2seq_forecast_rf_multi, "seq2seq", [0, 1, 2, 7, 8, 11, 12]),                    #5
        ("UniAR XGB Multi", create_and_train_uniautoreg_xgb_multi, direct_uniautoreg_forecast_xgb_multi, "uniautoreg", [0, 1, 2, 7, 8, 11, 12]),          #6
        ("XGB Autoreg Multi", create_and_train_uniautoreg_xgb_multi, direct_uniautoreg_forecast_xgb_multi, "uniautoreg", [0, 1, 2, 7, 8, 11, 12]),        #7
        ("UniSeq2Seq CNN Multi", create_and_train_uniseq2seq_cnn_multi, direct_uniseq2seq_forecast_cnn_multi, "uniseq2seq", [0, 1, 2, 7, 8, 11, 12])      #8
    ]
    
    # Set model numbers, for example, "1-3,5,7-8"
    user_input = "1-8"  # Example input, can be changed to any
    
    selected_indices = parse_model_indices(user_input)
    print("Selected models:", selected_indices)

    for idx, (model_name, train_func, forecast_func, forecast_type, feature_cols) in enumerate(models_to_add, start=1):
        if idx in selected_indices:
            combiner_multi.add_model(
                model_name,
                train_func,
                forecast_func,
                forecast_type=forecast_type,
                feature_cols=feature_cols
            )

    combiner_multi.run_tests(gap_lengths=[5, 12, 24, 48, 72], n_runs=5)
    combiner_multi.summarize_results(visualize=True, output_dir="plots_multi")

In [None]:
print(df["pm2_5"].describe())

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Sample data from Tables 1 and 2
gap_lengths = [5, 12, 24, 48, 72]
mae_uni_seq2seq_xgb = [5.286, 7.279, 6.969, 7.030, 10.144]
mae_multi_seq2seq_xgb = [5.180, 6.764, 6.537, 5.692, 9.177]
mae_uni_xgb_seq2seq = [5.555, 7.679, 7.188, 6.971, 10.624]
mae_multi_xgb_seq2seq = [5.507, 7.009, 6.697, 5.802, 8.528]
mae_std_uni_seq2seq_xgb = [1.616, 2.199, 3.111, 3.987, 10.943]
mae_std_multi_seq2seq_xgb = [1.500, 1.779, 3.142, 4.018, 12.750]
mae_std_uni_xgb_seq2seq = [1.480, 2.179, 2.557, 3.126, 10.059]
mae_std_multi_xgb_seq2seq = [1.798, 1.724, 3.009, 3.942, 10.796]

# Calculate percentage reduction
percent_reduction_seq2seq_xgb = [(u - m) / u * 100 for u, m in zip(mae_uni_seq2seq_xgb, mae_multi_seq2seq_xgb)]
percent_reduction_xgb_seq2seq = [(u - m) / u * 100 for u, m in zip(mae_uni_xgb_seq2seq, mae_multi_xgb_seq2seq)]

# Create bar chart
fig, ax = plt.subplots(figsize=(12, 6))
bar_width = 0.2
index = np.arange(len(gap_lengths)) * 1.1  # Increased spacing between gap length groups

# Plot bars with smaller offset within model types and larger offset between gap lengths
bars1 = ax.bar([i - 0.35 for i in index], mae_uni_seq2seq_xgb, bar_width, label='UniSeq2Seq XGB', color='blue', yerr=mae_std_uni_seq2seq_xgb, capsize=3, ecolor="gray")
bars2 = ax.bar([i - 0.15 for i in index], mae_multi_seq2seq_xgb, bar_width, label='UniSeq2Seq XGB Multi', color='green', yerr=mae_std_multi_seq2seq_xgb, capsize=3, ecolor="gray")
bars3 = ax.bar([i + 0.15 for i in index], mae_uni_xgb_seq2seq, bar_width, label='XGB Seq2Seq', color='orange', yerr=mae_std_uni_xgb_seq2seq, capsize=3, ecolor="gray")
bars4 = ax.bar([i + 0.35 for i in index], mae_multi_xgb_seq2seq, bar_width, label='XGB Seq2Seq Multi', color='red', yerr=mae_std_multi_xgb_seq2seq, capsize=3, ecolor="gray")

# Add percentage reduction labels
z = [-0.15, 0.95, 2.07, 3.19, 4.27]
for i, (pct1, pct2) in enumerate(zip(percent_reduction_seq2seq_xgb, percent_reduction_xgb_seq2seq)):
    
    ax.text(z[i], max(mae_multi_seq2seq_xgb[i], mae_uni_seq2seq_xgb[i]) + 3, f'▼ {pct1:.1f}%', ha='center', rotation=0)
    ax.text(z[i] + 0.5, max(mae_multi_xgb_seq2seq[i], mae_uni_xgb_seq2seq[i]) + 3, f'▼ {pct2:.1f}%', ha='center', rotation=0)


# Customize plot
ax.set_xlabel('Gap Length (hours)')
ax.set_ylabel('MAE (µg/m³)')
# ax.set_title('MAE Comparison: Four Corresponding Models')
ax.set_xticks(index)
ax.set_xticklabels(gap_lengths)
ax.grid(True, linestyle='--', alpha=0.7)
ax.legend(loc='upper left')

plt.tight_layout()
plt.savefig(os.path.join("output_diagrams", "mae_comparison_uni_multi.png"), dpi=600)
plt.show()