In [1]:
# Imports
import pandas as pd
import numpy as np
import json
import warnings
from datetime import datetime
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

import lightning.pytorch as pl
from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint
from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import SMAPE, MAE, RMSE

warnings.filterwarnings('ignore')


In [2]:
# Environment and path setup
def detect_environment():
    """Detect if running in Colab or local environment"""
    try:
        import google.colab
        from google.colab import drive
        drive.mount('/content/drive/')
        return 'colab', '/content/drive/MyDrive/fcst/'
    except ImportError:
        return 'local', '..'

environment, base_path = detect_environment()
print(f"Environment: {environment}")
print(f"Base path: {base_path}")

import sys
sys.path.append(base_path+'MCC Aggregates Forecasting/')
from evaluation import evaluate_and_report_mcc


Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).
Environment: colab
Base path: /content/drive/MyDrive/fcst/


In [3]:
# Load New DL Features
print("Loading pre-processed DL features...")

# Load LSTM format data
try:
    lstm_data = pd.read_parquet(f'{base_path}/data/features/dl_lstm_format.parquet')
    print(f"LSTM data loaded: {len(lstm_data):,} sequences")
except FileNotFoundError:
    print("ERROR: dl_lstm_format.parquet not found. Run dl_features.py first!")
    raise

# Load TFT format data if available
try:
    tft_data = pd.read_parquet(f'{base_path}/data/features/dl_tft_format.parquet')
    print(f"TFT data loaded: {len(tft_data):,} records")
except FileNotFoundError:
    print("WARNING: dl_tft_format.parquet not found, TFT will be skipped")
    tft_data = None

# Split train/test
lstm_train = lstm_data[lstm_data['split'] == 'train'].copy()
lstm_test = lstm_data[lstm_data['split'] == 'test'].copy()

print(f"LSTM - Train: {len(lstm_train):,}, Test: {len(lstm_test):,}")


Loading pre-processed DL features...
LSTM data loaded: 159,659 sequences
TFT data loaded: 8,461,927 records
LSTM - Train: 81,815, Test: 77,844


In [4]:
# Prepare LSTM Data
def prepare_lstm_tensors(data):
    """Convert LSTM format data to tensors"""
    sequences = []
    targets = []

    for _, row in data.iterrows():
        # Convert sequence list to numpy array
        seq_array = np.array(row['sequence'])  # Shape: (52, 2) - simplified to amount & month
        sequences.append(seq_array)
        targets.append(row['target'])

    return torch.FloatTensor(sequences), torch.FloatTensor(targets)

# Create LSTM tensors
X_train, y_train = prepare_lstm_tensors(lstm_train)
X_test, y_test = prepare_lstm_tensors(lstm_test)

print(f"LSTM tensors - Train: {X_train.shape}, Test: {X_test.shape}")

# Create dataloaders
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

# SPEED OPTIMIZATION: Larger batch sizes for faster training
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False)


LSTM tensors - Train: torch.Size([81815, 52, 2]), Test: torch.Size([77844, 52, 2])


In [5]:
# Temporal Fusion Transformer (Primary DL Model)
if tft_data is not None:
    print("\n" + "="*50)
    print("TRAINING TEMPORAL FUSION TRANSFORMER (PRIMARY DL MODEL)")
    print("="*50)

    try:
        # Prepare TFT data
        tft_train = tft_data[tft_data['split'] == 'train'].copy()
        tft_test = tft_data[tft_data['split'] == 'test'].copy()

        print(f"TFT train: {len(tft_train):,}, test: {len(tft_test):,}")

        # Create training cutoff based on time_idx
        max_time_idx = tft_train.groupby('series_id')['time_idx'].max()
        training_cutoff = (max_time_idx * 0.8).astype(int).to_dict()

        tft_combined = pd.concat([tft_train, tft_test], ignore_index=True)
        tft_combined['training_cutoff'] = tft_combined['series_id'].map(training_cutoff)

        # Check if target column exists and has values at time_idx=52
        print(f"TFT data columns: {tft_combined.columns.tolist()}")
        target_records = tft_combined[tft_combined['time_idx'] == 52]
        print(f"Target records (time_idx=52): {len(target_records)}")
        if len(target_records) > 0:
            print(f"Target values sample: {target_records['target'].head()}")
            print(f"Target values stats: min={target_records['target'].min():.4f}, max={target_records['target'].max():.4f}, mean={target_records['target'].mean():.4f}")

        # PREVENT DATA LEAKAGE: Remove amount from target timestep (time_idx=52)
        if 'amount' in tft_combined.columns:
            # For target timesteps (time_idx=52), remove amount to prevent data leakage
            # For historical timesteps (time_idx<52), keep amount and set target=0
            print(f"Before processing: {len(tft_combined)} records")

            # Fill target=0 for historical timesteps (time_idx < 52)
            tft_combined.loc[tft_combined['time_idx'] < 52, 'target'] = 0

            # For target timesteps (time_idx=52), remove amount column value
            tft_combined.loc[tft_combined['time_idx'] == 52, 'amount'] = 0

            print(f"After processing: {len(tft_combined)} records")
            print(f"Set amount=0 for target timesteps to prevent data leakage")

        # Check for any remaining NaN values in amount
        if 'amount' in tft_combined.columns:
            nan_count = tft_combined['amount'].isna().sum()
            if nan_count > 0:
                print(f"Warning: {nan_count} NaN values found in amount, filling with 0")
                tft_combined['amount'] = tft_combined['amount'].fillna(0)

        # Check available features
        available_features = tft_combined.columns.tolist()
        print(f"Available features: {available_features}")

        # Use only available features (NO DATA LEAKAGE)
        time_varying_known = ['time_idx']
        time_varying_unknown = []
        static_reals = []

        # Add time features
        if 'month' in available_features:
            time_varying_known.append('month')

        # Add historical amount data (OK since it's historical, not current)
        if 'amount' in available_features:
            time_varying_unknown.append('amount')

        # Add static features if available
        static_features = ['yearly_income', 'total_debt', 'credit_score', 'current_age']
        for feat in static_features:
            if feat in available_features:
                static_reals.append(feat)

        print(f"Using time_varying_known: {time_varying_known}")
        print(f"Using time_varying_unknown: {time_varying_unknown}")
        print(f"Using static_reals: {static_reals}")

        # Use simpler train/validation split to avoid unknown categories
        # Instead of complex cutoff, use simple time-based split
        train_data = tft_combined[tft_combined['split'] == 'train'].copy()

        # SPEED OPTIMIZATION: Sample only a small subset for fast training
        unique_series = train_data['series_id'].unique()

        # Sample only 100 series for fast training (instead of 4000+)
        np.random.seed(42)  # For reproducibility
        sampled_series = np.random.choice(unique_series, size=min(100, len(unique_series)), replace=False)
        train_data = train_data[train_data['series_id'].isin(sampled_series)].copy()

        print(f"SPEED OPTIMIZATION: Sampled {len(sampled_series)} series from {len(unique_series)} total")

        # Ensure we have enough data for each series
        series_lengths = train_data.groupby('series_id')['time_idx'].count()
        valid_series = series_lengths[series_lengths >= 15].index
        train_data = train_data[train_data['series_id'].isin(valid_series)].copy()

        print(f"Filtered to {len(valid_series)} series with >= 30 timesteps")

        # For validation, use last 20% of timesteps for each series
        max_train_time = train_data.groupby('series_id')['time_idx'].max()
        min_train_time = train_data.groupby('series_id')['time_idx'].min()

        # Calculate validation cutoff as 80% between min and max for each series
        val_cutoff = (min_train_time + (max_train_time - min_train_time) * 0.8).astype(int).to_dict()

        train_data['val_cutoff'] = train_data['series_id'].map(val_cutoff)

        print(f"Train data shape: {train_data.shape}")
        print(f"Unique series in train: {len(train_data['series_id'].unique())}")

        # Debug validation split
        train_subset = train_data[train_data['time_idx'] <= train_data['val_cutoff']]
        val_subset = train_data[train_data['time_idx'] > train_data['val_cutoff']]
        print(f"Training subset: {len(train_subset)} samples")
        print(f"Validation subset: {len(val_subset)} samples")

        # TFT Dataset
        training_tft = TimeSeriesDataSet(
            train_data[train_data['time_idx'] <= train_data['val_cutoff']],
            time_idx="time_idx",
            target="target",
            group_ids=["series_id"],
            min_encoder_length=8,
            max_encoder_length=16,
            min_prediction_length=1,
            max_prediction_length=1,
            time_varying_known_reals=time_varying_known,
            time_varying_unknown_reals=time_varying_unknown,
            static_reals=static_reals,
            target_normalizer=GroupNormalizer(groups=["series_id"], transformation="softplus"),
            add_relative_time_idx=True,
            add_target_scales=True,
            add_encoder_length=True,
            allow_missing_timesteps=True,
        )

        # Validation uses same series but later time points
        validation_tft = TimeSeriesDataSet.from_dataset(
            training_tft,
            train_data,  # Use same training data to avoid unknown categories
            predict=True,
            stop_randomization=True
        )

        # SPEED OPTIMIZATION: Much larger batch sizes
        train_dataloader_tft = training_tft.to_dataloader(train=True, batch_size=128, num_workers=0)  # Increased from 32
        val_dataloader_tft = validation_tft.to_dataloader(train=False, batch_size=128, num_workers=0)  # Increased from 32

        print(f"TFT Training samples: {len(training_tft)}")
        print(f"TFT Validation samples: {len(validation_tft)}")

        # Debug: Check a sample batch
        try:
            sample_batch = next(iter(train_dataloader_tft))
            print(f"Sample batch input shape: {sample_batch[0][0].shape}")
            print(f"Sample batch target shape: {sample_batch[1].shape}")
        except Exception as e:
            print(f"Error getting sample batch: {e}")

        # Create TFT model
        tft = TemporalFusionTransformer.from_dataset(
            training_tft,
            learning_rate=0.05,  # Increased for faster convergence
            hidden_size=8,      # Further reduced for speed
            attention_head_size=1,  # Minimal attention heads
            dropout=0.0,         # Remove dropout for speed
            hidden_continuous_size=4,  # Much smaller
            reduce_on_plateau_patience=1,  # Faster patience
        )

        print(f"TFT model created successfully")

        # Setup trainer for TFT
        trainer_tft = pl.Trainer(
            max_epochs=2,  # Only 2 epochs as requested
            enable_progress_bar=True,  # Enable progress bar to see training
            logger=False,
            enable_checkpointing=False,
            accelerator="gpu",  # Force GPU
            devices=1,
            # precision=16,  # Mixed precision for speed
        )

        print("Starting TFT training...")
        # Train TFT
        trainer_tft.fit(tft, train_dataloaders=train_dataloader_tft, val_dataloaders=val_dataloader_tft)

        # Evaluate TFT on actual test data
        print("\nGenerating TFT predictions on test data...")

        # Prepare test data for prediction
        test_data = tft_combined[tft_combined['split'] == 'test'].copy()

        # SPEED OPTIMIZATION: Sample smaller test set for faster evaluation
        train_series_ids = train_data['series_id'].unique()
        test_data = test_data[test_data['series_id'].isin(train_series_ids)].copy()

        print(f"SPEED OPTIMIZATION: Using test data from same {len(train_series_ids)} series as training")
        print(f"Test data filtered to {len(test_data)} records from original test set")

        # Further sample if still too many records for speed
        available_test_series = test_data['series_id'].unique()
        if len(available_test_series) > 50:
            sampled_test_series = np.random.choice(available_test_series, size=50, replace=False)
            test_data = test_data[test_data['series_id'].isin(sampled_test_series)].copy()
            print(f"Further sampled to {len(sampled_test_series)} series for speed")

        # Create test dataset from training dataset structure
        test_tft = TimeSeriesDataSet.from_dataset(
            training_tft,
            test_data,
            predict=True,
            stop_randomization=True
        )

        test_dataloader_tft = test_tft.to_dataloader(train=False, batch_size=128, num_workers=0)  # Large batch for speed

        print(f"Test dataset size: {len(test_tft)}")

        # Generate predictions
        tft_predictions = tft.predict(test_dataloader_tft, trainer_kwargs=dict(logger=False))

        if hasattr(tft_predictions, 'cpu'):
            y_pred_tft = tft_predictions.cpu().numpy().flatten()
        else:
            y_pred_tft = tft_predictions.flatten()

        # Get actual test targets (from original test data)
        test_targets = test_data[test_data['time_idx'] == 52]['target'].values
        y_true_tft = test_targets[:len(y_pred_tft)]  # Match prediction length

        print(f"Predictions shape: {y_pred_tft.shape}")
        print(f"Actuals shape: {y_true_tft.shape}")

        # Get training data for RMSSE calculation - use actual target values from training set
        train_target_mask = (train_data['time_idx'] == 52)
        train_amounts = train_data.loc[train_target_mask, 'target'].values

        # Evaluate TFT
        tft_metrics, tft_report = evaluate_and_report_mcc(
            "TFT (Primary DL Model)", y_true_tft, y_pred_tft, train_amounts, print_report=True
        )
        tft_success = True

    except Exception as e:
        import traceback
        print(f"TFT training/evaluation failed: {str(e)}")
        print(f"Error type: {type(e).__name__}")
        print("Full traceback:")
        traceback.print_exc()
        tft_metrics = {'sMAPE_w': np.nan, 'RMSSE_w': np.nan, 'MAE': np.nan, 'RMSE': np.nan}
        tft_success = False

else:
    print("\n" + "="*50)
    print("TFT DATA MISSING - SKIPPING TFT")
    print("="*50)
    tft_metrics = {'sMAPE_w': np.nan, 'RMSSE_w': np.nan, 'MAE': np.nan, 'RMSE': np.nan}
    tft_success = False



TRAINING TEMPORAL FUSION TRANSFORMER (PRIMARY DL MODEL)
TFT train: 4,336,195, test: 4,125,732
TFT data columns: ['series_id', 'time_idx', 'split', 'amount', 'month', 'yearly_income', 'total_debt', 'credit_score', 'current_age', 'target', 'training_cutoff']
Target records (time_idx=52): 159659
Target values sample: 52     221.570007
105    264.320007
158    709.369995
211    974.809998
264    416.429993
Name: target, dtype: float64
Target values stats: min=0.0200, max=5177.7598, mean=232.4865
Before processing: 8461927 records
After processing: 8461927 records
Set amount=0 for target timesteps to prevent data leakage
Available features: ['series_id', 'time_idx', 'split', 'amount', 'month', 'yearly_income', 'total_debt', 'credit_score', 'current_age', 'target', 'training_cutoff']
Using time_varying_known: ['time_idx', 'month']
Using time_varying_unknown: ['amount']
Using static_reals: ['yearly_income', 'total_debt', 'credit_score', 'current_age']
SPEED OPTIMIZATION: Sampled 100 series f

INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: You are using a CUDA device ('NVIDIA A100-SXM4-40GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
INFO:lightning.pytorch.utilities.rank_zero:You are using a CUDA device ('NVIDIA A100-SXM4-40GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for p

Starting TFT training...


INFO: 
   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 0      | train
3  | prescalers                         | ModuleDict                      | 88     | train
4  | static_variable_selection          | VariableSelectionNetwork        | 1.6 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 852    | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 618    | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 304    | train
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 3

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=2` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=2` reached.



Generating TFT predictions on test data...


INFO: Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
INFO:lightning.pytorch.utilities.rank_zero:Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


SPEED OPTIMIZATION: Using test data from same 100 series as training
Test data filtered to 96937 records from original test set
Further sampled to 50 series for speed
Test dataset size: 50
Predictions shape: (50,)
Actuals shape: (50,)

MCC AGGREGATES FORECASTING REPORT: TFT (PRIMARY DL MODEL)

OVERALL PERFORMANCE:
Metric          Value          
------------------------------
sMAPE_w         200.0000       
RMSSE_w         3.3882         
MAE             747.6486       
RMSE            809.0996       




In [6]:
# Enhanced LSTM Model (Primary Deep Learning Model)
class EnhancedLSTM(nn.Module):
    def __init__(self, input_size=2, hidden_size=64, num_layers=2, output_size=1):
        super(EnhancedLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=0.1)
        self.fc = nn.Sequential(
            nn.Linear(hidden_size, hidden_size // 2),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(hidden_size // 2, output_size)
        )

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        last_hidden = lstm_out[:, -1, :]
        output = self.fc(last_hidden)
        return output

# Create and train LSTM model
print("\n" + "="*50)
print("TRAINING ENHANCED LSTM MODEL (PRIMARY DL MODEL)")
print("="*50)

device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device: {device}")

lstm_model = EnhancedLSTM(input_size=2).to(device)  # 2 features per timestep (amount, month)
criterion = nn.MSELoss()
optimizer = optim.Adam(lstm_model.parameters(), lr=0.001)

# Training loop
epochs = 10  # Optimized for speed
for epoch in range(epochs):
    lstm_model.train()
    train_loss = 0
    for batch_X, batch_y in train_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)

        optimizer.zero_grad()
        outputs = lstm_model(batch_X)
        loss = criterion(outputs.squeeze(), batch_y)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {train_loss/len(train_loader):.4f}")

# Evaluate LSTM
lstm_model.eval()
lstm_predictions = []
lstm_actuals = []

with torch.no_grad():
    for batch_X, batch_y in test_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        outputs = lstm_model(batch_X)
        lstm_predictions.extend(outputs.squeeze().cpu().numpy())
        lstm_actuals.extend(batch_y.cpu().numpy())

lstm_predictions = np.array(lstm_predictions)
lstm_actuals = np.array(lstm_actuals)

# Evaluate LSTM
lstm_metrics, lstm_report = evaluate_and_report_mcc(
    "Enhanced LSTM (Primary DL Model)", lstm_actuals, lstm_predictions, y_train.numpy(), print_report=True
)



TRAINING ENHANCED LSTM MODEL (PRIMARY DL MODEL)
Using device: cuda
Epoch [1/10], Loss: 128278.7498
Epoch [2/10], Loss: 93916.9519
Epoch [3/10], Loss: 93024.9567
Epoch [4/10], Loss: 93095.0072
Epoch [5/10], Loss: 90460.9373
Epoch [6/10], Loss: 59274.9551
Epoch [7/10], Loss: 42654.7646
Epoch [8/10], Loss: 37294.5955
Epoch [9/10], Loss: 34333.4263
Epoch [10/10], Loss: 33232.4935

MCC AGGREGATES FORECASTING REPORT: ENHANCED LSTM (PRIMARY DL MODEL)

OVERALL PERFORMANCE:
Metric          Value          
------------------------------
sMAPE_w         66.8325        
RMSSE_w         0.6871         
MAE             109.9301       
RMSE            171.1378       




In [7]:
# Final Model Comparison
print("\n" + "="*50)
print("FINAL MODEL COMPARISON - ALL DEEP LEARNING MODELS")
print("="*50)

# Collect all models and their metrics
all_models = ['TFT (Primary DL Model)', 'Enhanced LSTM (Primary DL Model)']
all_metrics = [tft_metrics, lstm_metrics]

# Create comprehensive comparison
final_comparison_df = pd.DataFrame({
    'Model': all_models,
    'sMAPE_w': [m['sMAPE_w'] for m in all_metrics],
    'RMSSE_w': [m['RMSSE_w'] for m in all_metrics],
    'MAE': [m['MAE'] for m in all_metrics],
    'RMSE': [m['RMSE'] for m in all_metrics]
})

print(final_comparison_df.round(4))

# Find best model overall
best_model_idx = final_comparison_df['sMAPE_w'].idxmin()
best_model = final_comparison_df.loc[best_model_idx, 'Model']
print(f"\nBest overall model by sMAPE_w: {best_model}")



FINAL MODEL COMPARISON - ALL DEEP LEARNING MODELS
                              Model   sMAPE_w  RMSSE_w       MAE      RMSE
0            TFT (Primary DL Model)  200.0000   3.3882  747.6486  809.0996
1  Enhanced LSTM (Primary DL Model)   66.8325   0.6871  109.9301  171.1378

Best overall model by sMAPE_w: Enhanced LSTM (Primary DL Model)


In [8]:
# Save Final Results
# Convert numpy types to Python types for JSON serialization
def convert_to_json_serializable(obj):
    """Convert numpy types to Python types for JSON serialization"""
    if isinstance(obj, np.floating):
        return float(obj)
    elif isinstance(obj, np.integer):
        return int(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    elif isinstance(obj, dict):
        return {k: convert_to_json_serializable(v) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [convert_to_json_serializable(v) for v in obj]
    else:
        return obj

# Convert metrics to ensure JSON serialization
tft_metrics_clean = convert_to_json_serializable(tft_metrics)
lstm_metrics_clean = convert_to_json_serializable(lstm_metrics)

final_results = {
    'experiment_info': {
        'timestamp': datetime.now().isoformat(),
        'lstm_sequences_train': len(lstm_train),
        'lstm_sequences_test': len(lstm_test),
        'sequence_length': 52,
        'forecast_horizon': 1,
        'features_per_timestep': 2,
        'device': str(device),
        'tft_success': tft_success if 'tft_success' in locals() else False
    },
    'models': {
        'tft_primary_dl_model': {
            'metrics': tft_metrics_clean,
            'hyperparameters': {
                'learning_rate': 0.03,
                'hidden_size': 16,
                'attention_head_size': 1,
                'dropout': 0.0,
                'hidden_continuous_size': 8,
                'reduce_on_plateau_patience': 1,
                'max_epochs': 2
            }
        },
        'enhanced_lstm_primary_dl_model': {
            'metrics': lstm_metrics_clean,
            'hyperparameters': {
                'input_size': 2,
                'hidden_size': 64,
                'num_layers': 2,
                'learning_rate': 0.001,
                'epochs': epochs
            }
        }
    },
    'comparison': convert_to_json_serializable(final_comparison_df.to_dict('records')),
    'best_model': best_model
}

# Save to JSON
with open('dl_models_results.json', 'w') as f:
    json.dump(final_results, f, indent=2)

print(f"\nFinal results saved to dl_models_results.json")
print("Deep learning models training completed!")


Final results saved to dl_models_results.json
Deep learning models training completed!
