In [1]:
from utils.metrics import metric
from data_provider.data_loader import Dataset_Custom
from models import xPatch
from utils.tools import EarlyStopping, adjust_learning_rate, visual
from data_provider.data_factory import data_provider
from exp.exp_main import Exp_Main
import sys
import os
import time
import warnings
import math
import argparse
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch import optim
from torch.utils.data import DataLoader
from sklearn.preprocessing import StandardScaler

warnings.filterwarnings('ignore')

project_root = os.path.abspath('./')
if project_root not in sys.path:
    sys.path.append(project_root)

In [4]:
# Complete sweep cell for notebook
import wandb


def notebook_sweep():
    # Advanced sweep config
    sweep_config = {
        'method': 'bayes',
        'metric': {
            'name': 'final_test_mse',
            'goal': 'minimize'
        },
        'parameters': {
            # Learning optimization
            'learning_rate': {
                'distribution': 'log_uniform_values',
                'min': 0.00001,
                'max': 0.001
            },
            'batch_size': {'values': [8, 16, 32]},  # Reduced range
            # Fixed: should be a parameter, not fixed value
            'train_epochs': {'values': [3, 5, 8]},

            # Model architecture - xPatch specific
            'd_model': {'values': [64, 128, 256]},  # Reduced range
            # Fixed values instead of distribution
            'd_ff': {'values': [128, 256, 512, 768]},
            'e_layers': {'values': [2, 3, 4]},  # Reduced range
            'dropout': {
                'distribution': 'uniform',
                'min': 0.1,
                'max': 0.3
            },

            # Patching strategy - MORE CONSERVATIVE
            # Reduced range, avoiding very small/large
            'patch_len': {'values': [8, 12, 16]},
            'stride': {'values': [4, 6, 8]},  # Reduced range

            # Sequence modeling - CONSERVATIVE
            'seq_len': {'values': [48, 72, 96]},  # Reduced range
            'pred_len': {'values': [3, 6, 9]},    # Full range

            # Moving average parameters
            'ma_type': {'values': ['ema', 'dema']},
            'alpha': {
                'distribution': 'uniform',
                'min': 0.1,
                'max': 0.4
            },
            'beta': {
                'distribution': 'uniform',
                'min': 0.1,
                'max': 0.4
            },

            # Advanced xPatch parameters
            'k': {'values': [2, 3, 4]},  # Reduced range
            'decomp': {'values': [0, 1]},

            # Learning rate scheduling
            # Removed 'cosine' temporarily
            'lradj': {'values': ['type1', 'type2']},

            # Regularization
            'revin': {'values': [0, 1]},  # Reduced range

            # LSTM Configuration (if using LSTM enhanced model)
            'use_lstm': {'values': [True, False]},
            # Integer values
            'lstm_hidden_size': {'values': [64, 128, 192, 256]},
            'lstm_layers': {'values': [1, 2, 3, 4]},  # Integer values
            'lstm_dropout': {
                'distribution': 'uniform',
                'min': 0.1,
                'max': 0.3
            },

            # DIRECTIONAL LOSS CONFIGURATION - moved inside parameters
            'loss': {'values': [
                #'mae',
                #'mse',
                'directional_mae',
                'directional_mse',
                'weighted_directional']},
            'directional_alpha': {
                'distribution': 'uniform',
                'min': 0.3,
                'max': 0.8
            },
            'directional_beta': {
                'distribution': 'uniform',
                'min': 0.2,
                'max': 1.0
            },
            'directional_gamma': {
                'distribution': 'uniform',
                'min': 0.1,
                'max': 0.3
            },

        },

        # Advanced early termination
        'early_terminate': {
            'type': 'hyperband',
            'min_iter': 2,
            'max_iter': 15,
            'eta': 3,
            's': 2
        }
    }

    # Create sweep
    sweep_id = wandb.sweep(sweep_config,
                           project="CS7643-GroupProject",
                           entity="xplstm")

    def train_run():
        # Define Args class inside the function so it's available
        class Args:
            def __init__(self):
                self.is_training = 1
                self.train_only = False
                self.model_id = 'AAPL_Pred5_Notebook_Tuned'
                self.model = 'xPatch'
                self.data = 'custom'  # Fixed: should be 'custom' for AAPL dataset
                self.scale = True
                self.root_path = './data/'
                self.data_path = 'aapl_OHLCV.csv'
                self.features = 'MS'
                self.target = 'Close'  # Fixed: should be 'Close' for AAPL dataset
                self.freq = 'd'
                self.checkpoints = './checkpoints/'
                self.embed = 'timeF'
                self.label_len = 48
                self.enc_in = 9  # Fixed: AAPL dataset actually has 9 features including time features
                self.padding_patch = 'end'
                self.num_workers = 8
                self.itr = 1
                self.patience = 15
                self.des = 'Sweep_Run'
                self.loss = 'mae'
                self.revin = 1

                # Directional loss defaults
                self.directional_alpha = 0.5
                self.directional_beta = 1.0
                self.directional_gamma = 0.1

                # Default values (will be overridden by sweep)
                self.seq_len = 48
                self.pred_len = 5
                self.patch_len = 12
                self.stride = 6
                self.ma_type = 'ema'
                self.alpha = 0.2
                self.beta = 0.2
                self.train_epochs = 3
                self.batch_size = 16
                self.learning_rate = 0.0001
                self.lradj = 'type1'
                self.d_model = 128
                self.d_ff = 256
                self.e_layers = 3
                self.dropout = 0.1
                self.k = 3
                self.decomp = 0

                # LSTM Configuration (if using LSTM enhanced model)
                self.use_lstm = True
                self.lstm_hidden_size = 192
                self.lstm_layers = 4
                self.lstm_dropout = 0.2
                self.lstm_bidirectional = True

                # GPU settings
                self.use_gpu = True if torch.cuda.is_available() else False
                self.gpu = 0
                self.use_multi_gpu = False
                self.devices = '0'
                self.test_flop = False

                # Wandb settings
                self.use_wandb = True
                self.wandb_entity = 'xplstm'
                self.wandb_project = 'CS7643-GroupProject'
                self.experiment_notes = 'Sweep experiment with directional loss'

        # Initialize wandb run
        run = wandb.init()

        # Check if wandb initialization was successful
        if run is None:
            print("Failed to initialize wandb run")
            return

        try:
            # Create Args instance
            args = Args()

            # Override all sweep parameters with proper type conversion
            for param, value in wandb.config.items():
                if hasattr(args, param):
                    try:
                        # Special handling for parameters that need to be integers
                        if param in ['lstm_hidden_size', 'lstm_layers', 'train_epochs', 'batch_size',
                                     'seq_len', 'pred_len', 'patch_len', 'stride', 'd_model', 'd_ff', 'e_layers', 'k']:
                            # Convert via float first to handle numpy types
                            setattr(args, param, int(float(value)))
                        # Special handling for boolean parameters
                        elif param in ['use_lstm', 'lstm_bidirectional', 'revin', 'decomp']:
                            # Handle various boolean representations
                            if isinstance(value, (bool, int)):
                                setattr(args, param, bool(value))
                            elif isinstance(value, str):
                                setattr(args, param, value.lower()
                                        in ['true', '1', 'yes'])
                            else:
                                setattr(args, param, bool(value))
                        # Special handling for float parameters (including directional loss params)
                        elif param in ['learning_rate', 'dropout', 'alpha', 'beta', 'lstm_dropout',
                                       'directional_alpha', 'directional_beta', 'directional_gamma']:
                            setattr(args, param, float(value))
                        else:
                            setattr(args, param, value)
                    except (ValueError, TypeError) as e:
                        print(
                            f"Warning: Could not convert parameter {param}={value}: {e}")
                        # Use default value or skip
                        continue

            # Ensure model_id is unique - use the stored run reference
            args.model_id = f"sweep_{run.name}"

            # COMPREHENSIVE TYPE CONVERSION: Ensure ALL parameters are native Python types
            # Convert all integer parameters
            int_params = ['lstm_hidden_size', 'lstm_layers', 'train_epochs', 'batch_size',
                          'seq_len', 'pred_len', 'patch_len', 'stride', 'd_model', 'd_ff', 'e_layers', 'k']
            for param in int_params:
                if hasattr(args, param):
                    setattr(args, param, int(float(str(getattr(args, param)))))

            # Convert all float parameters (including directional loss parameters)
            float_params = ['learning_rate', 'dropout', 'alpha', 'beta', 'lstm_dropout',
                            'directional_alpha', 'directional_beta', 'directional_gamma']
            for param in float_params:
                if hasattr(args, param):
                    setattr(args, param, float(str(getattr(args, param))))

            # Convert all boolean parameters
            bool_params = ['use_lstm', 'lstm_bidirectional', 'revin', 'decomp']
            for param in bool_params:
                if hasattr(args, param):
                    val = getattr(args, param)
                    if isinstance(val, (int, float)):
                        setattr(args, param, bool(int(val)))
                    elif isinstance(val, str):
                        setattr(args, param, val.lower()
                                in ['true', '1', 'yes'])
                    else:
                        setattr(args, param, bool(val))

            # CRITICAL: Enhanced parameter validation and adjustment
            # 1. Ensure patch_len is reasonable for seq_len
            if args.patch_len > args.seq_len:
                args.patch_len = args.seq_len // 4
                wandb.log(
                    {"param_adjustment": f"patch_len adjusted to {args.patch_len}"})

            # 2. Ensure stride is reasonable for patch_len
            if args.stride > args.patch_len:
                args.stride = max(1, args.patch_len // 2)
                wandb.log(
                    {"param_adjustment": f"stride adjusted to {args.stride}"})

            # 3. Calculate number of patches and ensure it's > 0
            num_patches = max(
                1, (args.seq_len - args.patch_len) // args.stride + 1)
            if num_patches <= 0:
                # Adjust parameters to ensure at least 1 patch
                args.patch_len = min(args.patch_len, args.seq_len)
                args.stride = min(args.stride, args.patch_len)
                num_patches = max(
                    1, (args.seq_len - args.patch_len) // args.stride + 1)
                wandb.log({
                    "param_adjustment": f"Fixed patching: patch_len={args.patch_len}, stride={args.stride}, num_patches={num_patches}"
                })

            # 4. Ensure d_ff >= d_model
            if args.d_ff < args.d_model:
                args.d_ff = args.d_model * 2
                wandb.log({"param_adjustment": f"d_ff adjusted to {args.d_ff}"})

            # 5. Ensure pred_len < seq_len
            if args.pred_len >= args.seq_len:
                args.pred_len = min(args.pred_len, args.seq_len // 4)
                wandb.log(
                    {"param_adjustment": f"pred_len adjusted to {args.pred_len}"})

            # 6. Additional validation for xPatch specific parameters
            if args.patch_len < 4:
                args.patch_len = 4
                wandb.log(
                    {"param_adjustment": f"patch_len minimum adjusted to {args.patch_len}"})

            if args.stride < 1:
                args.stride = 1
                wandb.log(
                    {"param_adjustment": f"stride minimum adjusted to {args.stride}"})

            # 7. Validate the final configuration
            final_num_patches = (
                args.seq_len - args.patch_len) // args.stride + 1
            if final_num_patches <= 0:
                raise ValueError(f"Invalid configuration results in {final_num_patches} patches. "
                                 f"seq_len={args.seq_len}, patch_len={args.patch_len}, stride={args.stride}")

            # 8. Ensure LSTM parameters are valid if LSTM is enabled with extensive debugging
            if hasattr(args, 'use_lstm') and args.use_lstm:
                # Set defaults if missing
                if not hasattr(args, 'lstm_hidden_size'):
                    args.lstm_hidden_size = 192
                if not hasattr(args, 'lstm_layers'):
                    args.lstm_layers = 2
                if not hasattr(args, 'lstm_dropout'):
                    args.lstm_dropout = 0.2
                if not hasattr(args, 'lstm_bidirectional'):
                    args.lstm_bidirectional = True

                # CRITICAL: Convert to native Python types to avoid PyTorch tensor issues
                # This prevents "'float' object has no attribute 'to'" errors
                args.lstm_hidden_size = int(float(str(args.lstm_hidden_size)))
                args.lstm_layers = int(float(str(args.lstm_layers)))
                args.lstm_dropout = float(str(args.lstm_dropout))

                # Special handling for boolean conversion
                if isinstance(args.lstm_bidirectional, (int, float)):
                    args.lstm_bidirectional = bool(
                        int(args.lstm_bidirectional))
                elif isinstance(args.lstm_bidirectional, str):
                    args.lstm_bidirectional = args.lstm_bidirectional.lower() in [
                        'true', '1', 'yes']
                else:
                    args.lstm_bidirectional = bool(args.lstm_bidirectional)

                # Validate ranges
                args.lstm_hidden_size = max(1, min(512, args.lstm_hidden_size))
                args.lstm_layers = max(1, min(8, args.lstm_layers))
                args.lstm_dropout = max(0.0, min(0.9, args.lstm_dropout))

                # Debug logging
                print(f"LSTM Configuration (converted to native Python types):")
                print(
                    f"  lstm_hidden_size: {args.lstm_hidden_size} (type: {type(args.lstm_hidden_size)})")
                print(
                    f"  lstm_layers: {args.lstm_layers} (type: {type(args.lstm_layers)})")
                print(
                    f"  lstm_dropout: {args.lstm_dropout} (type: {type(args.lstm_dropout)})")
                print(
                    f"  lstm_bidirectional: {args.lstm_bidirectional} (type: {type(args.lstm_bidirectional)})")

                wandb.log({
                    "config/lstm_enabled": True,
                    "config/lstm_hidden_size": int(args.lstm_hidden_size),
                    "config/lstm_layers": int(args.lstm_layers),
                    "config/lstm_dropout": float(args.lstm_dropout),
                    "config/lstm_bidirectional": bool(args.lstm_bidirectional)
                })

            # 9. Validate directional loss parameters
            if hasattr(args, 'loss') and 'directional' in args.loss:
                # Ensure directional parameters are in valid ranges
                args.directional_alpha = max(
                    0.1, min(1.0, args.directional_alpha))
                args.directional_beta = max(
                    0.1, min(3.0, args.directional_beta))
                if hasattr(args, 'directional_gamma'):
                    args.directional_gamma = max(
                        0.0, min(1.0, args.directional_gamma))

                print(f"Directional Loss Configuration:")
                print(f"  loss: {args.loss}")
                print(f"  directional_alpha: {args.directional_alpha}")
                print(f"  directional_beta: {args.directional_beta}")
                if hasattr(args, 'directional_gamma'):
                    print(f"  directional_gamma: {args.directional_gamma}")

            # Log parameter combinations for analysis
            wandb.log({
                "config/patch_ratio": args.patch_len / args.seq_len,
                "config/stride_ratio": args.stride / args.patch_len,
                "config/model_complexity": args.d_model * args.e_layers,
                "config/ff_ratio": args.d_ff / args.d_model,
                "config/prediction_horizon": args.pred_len / args.seq_len,
                "config/num_patches": final_num_patches,
                "config/patch_coverage": (args.patch_len + (final_num_patches - 1) * args.stride) / args.seq_len,
                "config/loss_type": args.loss,
                "config/uses_directional_loss": 'directional' in args.loss
            })

            # Add error handling for configuration issues
            if args.patch_len <= 0 or args.stride <= 0:
                raise ValueError(
                    f"Invalid patch configuration: patch_len={args.patch_len}, stride={args.stride}")

            if args.d_model <= 0 or args.d_ff <= 0:
                raise ValueError(
                    f"Invalid model dimensions: d_model={args.d_model}, d_ff={args.d_ff}")

            if not os.path.exists(args.checkpoints):
                os.makedirs(args.checkpoints)

            print(f"Starting sweep run: {args.model_id}")
            print(f"Key parameters: lr={args.learning_rate:.6f}, batch={args.batch_size}, "
                  f"patch_len={args.patch_len}, d_model={args.d_model}")
            print(f"Loss function: {args.loss}")
            print(f"Patching info: seq_len={args.seq_len}, patch_len={args.patch_len}, "
                  f"stride={args.stride}, num_patches={final_num_patches}")

            # Additional debugging for all numeric parameters
            print("All numeric parameter types:")
            numeric_params = ['seq_len', 'pred_len', 'patch_len', 'stride', 'd_model', 'd_ff', 'e_layers',
                              'batch_size', 'train_epochs', 'learning_rate', 'dropout', 'k', 'decomp', 'revin',
                              'directional_alpha', 'directional_beta', 'directional_gamma']
            for param in numeric_params:
                if hasattr(args, param):
                    val = getattr(args, param)
                    print(f"  {param}: {val} (type: {type(val)})")

            # Create experiment and train
            try:
                print("Creating Exp_Main instance...")
                exp = Exp_Main(args)
                print("Exp_Main created successfully")

                print("Starting training...")
                best_model = exp.train(args.model_id)
                print("Training completed successfully")

                print("Starting testing...")
                exp.test(args.model_id)
                print("Testing completed successfully")
            except Exception as model_error:
                print(
                    f"Error during model creation/training: {str(model_error)}")
                print(f"Error type: {type(model_error)}")
                import traceback
                print("Full traceback:")
                traceback.print_exc()
                raise  # Re-raise to see the full error

            # Log success
            if run is not None:
                wandb.log({"run_status": "completed"})

        except Exception as e:
            print(f"Error in sweep run: {str(e)}")
            # More robust error logging
            try:
                if run is not None:
                    wandb.log({
                        "run_status": "failed",
                        "error_message": str(e),
                        # Ensure bad runs are penalized
                        "final_test_mse": float('inf')
                    })
            except:
                print("Could not log error to wandb")

            # Don't re-raise the exception - just let the run fail gracefully
            return

        finally:
            # Ensure wandb run is properly finished
            if run is not None:
                wandb.finish()

    print(
        f"Sweep URL: https://wandb.ai/xplstm/CS7643-GroupProject/sweeps/{sweep_id}")
    print("Advanced sweep configuration:")
    print(f"- {len(sweep_config['parameters'])} hyperparameters")
    print(f"- Bayesian optimization with Hyperband early termination")
    print(f"- Parameter validation and auto-adjustment")
    print(f"- Directional loss support with 3 loss functions")

    # Run sweep with more experiments for comprehensive search
    wandb.agent(sweep_id, function=train_run, count=3)

    return sweep_id


# Uncomment to run advanced sweep
sweep_id = notebook_sweep()

Create sweep with ID: c387c93b
Sweep URL: https://wandb.ai/xplstm/CS7643-GroupProject/sweeps/c387c93b
Sweep URL: https://wandb.ai/xplstm/CS7643-GroupProject/sweeps/c387c93b
Advanced sweep configuration:
- 26 hyperparameters
- Bayesian optimization with Hyperband early termination
- Parameter validation and auto-adjustment
- Directional loss support with 3 loss functions


[34m[1mwandb[0m: Agent Starting Run: 5xe77yz4 with config:
[34m[1mwandb[0m: 	alpha: 0.2555745131402333
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.3045934502999885
[34m[1mwandb[0m: 	d_ff: 128
[34m[1mwandb[0m: 	d_model: 256
[34m[1mwandb[0m: 	decomp: 1
[34m[1mwandb[0m: 	directional_alpha: 0.4462102818331315
[34m[1mwandb[0m: 	directional_beta: 0.3505151954614252
[34m[1mwandb[0m: 	directional_gamma: 0.23365614236447613
[34m[1mwandb[0m: 	dropout: 0.2839619988320058
[34m[1mwandb[0m: 	e_layers: 4
[34m[1mwandb[0m: 	k: 4
[34m[1mwandb[0m: 	learning_rate: 0.0004930867836074126
[34m[1mwandb[0m: 	loss: directional_mae
[34m[1mwandb[0m: 	lradj: type1
[34m[1mwandb[0m: 	lstm_dropout: 0.24008211244566036
[34m[1mwandb[0m: 	lstm_hidden_size: 192
[34m[1mwandb[0m: 	lstm_layers: 3
[34m[1mwandb[0m: 	ma_type: dema
[34m[1mwandb[0m: 	patch_len: 16
[34m[1mwandb[0m: 	pred_len: 6
[34m[1mwandb[0m: 	revin: 0
[34m[1mwandb[0m: 	seq_l

Directional Loss Configuration:
  loss: directional_mae
  directional_alpha: 0.4462102818331315
  directional_beta: 0.3505151954614252
  directional_gamma: 0.23365614236447613
Starting sweep run: sweep_icy-sweep-1
Key parameters: lr=0.000493, batch=16, patch_len=16, d_model=256
Loss function: directional_mae
Patching info: seq_len=96, patch_len=16, stride=8, num_patches=11
All numeric parameter types:
  seq_len: 96 (type: <class 'int'>)
  pred_len: 6 (type: <class 'int'>)
  patch_len: 16 (type: <class 'int'>)
  stride: 8 (type: <class 'int'>)
  d_model: 256 (type: <class 'int'>)
  d_ff: 512 (type: <class 'int'>)
  e_layers: 4 (type: <class 'int'>)
  batch_size: 16 (type: <class 'int'>)
  train_epochs: 8 (type: <class 'int'>)
  learning_rate: 0.0004930867836074126 (type: <class 'float'>)
  dropout: 0.2839619988320058 (type: <class 'float'>)
  k: 4 (type: <class 'int'>)
  decomp: True (type: <class 'bool'>)
  revin: False (type: <class 'bool'>)
  directional_alpha: 0.4462102818331315 (ty

0,1
config/ff_ratio,▁
config/model_complexity,▁
config/num_patches,▁
config/patch_coverage,▁
config/patch_ratio,▁
config/prediction_horizon,▁
config/stride_ratio,▁

0,1
config/ff_ratio,2
config/loss_type,directional_mae
config/model_complexity,1024
config/num_patches,11
config/patch_coverage,1
config/patch_ratio,0.16667
config/prediction_horizon,0.0625
config/stride_ratio,0.5
config/uses_directional_loss,True
param_adjustment,d_ff adjusted to 512...


Exp_Main created successfully
Starting training...
train 2617
val 385
test 771
	iters: 100, epoch: 1 | loss: 0.2122265
	speed: 0.0147s/iter; left time: 17.7478s
	iters: 100, epoch: 1 | loss: 0.2122265
	speed: 0.0147s/iter; left time: 17.7478s
Epoch: 1 cost time: 2.361626148223877
Epoch: 1 cost time: 2.361626148223877
Epoch: 1, Steps: 163 | Train Loss: 0.2005765 Vali Loss: 1.2605011 Test Loss: 46.1854871
Validation loss decreased (inf --> 1.260501).  Saving model ...
Updating learning rate to 0.0004930867836074126
Epoch: 1, Steps: 163 | Train Loss: 0.2005765 Vali Loss: 1.2605011 Test Loss: 46.1854871
Validation loss decreased (inf --> 1.260501).  Saving model ...
Updating learning rate to 0.0004930867836074126
	iters: 100, epoch: 2 | loss: 0.1387630
	speed: 0.0329s/iter; left time: 34.2461s
	iters: 100, epoch: 2 | loss: 0.1387630
	speed: 0.0329s/iter; left time: 34.2461s
Epoch: 2 cost time: 2.4450793266296387
Epoch: 2 cost time: 2.4450793266296387
Epoch: 2, Steps: 163 | Train Loss: 0.14

0,1
batch,▂▃▄▅▆█▁▂▃▄█▁▂▃▇▂▃▅▆▇▁▂▃▄▅█▁▃▅▆█▁▃▄▅▇▂▃▅█
batch_loss,▆█▇▇▅█▄▅▄▅▃▃▃▂▂▃▅▂▃▃▃▃▁▂▄▃▃▁▂▃▂▂▁▂▃▃▄▅▂▂
best_sample_mse,▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇█████
epoch_time,▂▅▂▅▄▁▃█
final_test_mae,▁
final_test_mse,▁
learning_rate,████████████▄▄▄▄▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
mse_std,▁
test_loss,█▅▃▂▁▁▁▁

0,1
batch,160.0
batch_loss,0.05546
best_sample_mse,5.03661
epoch,8.0
epoch_time,3.40483
final_test_mae,5.50719
final_test_mse,32.44786
learning_rate,1e-05
mse_std,15.91345
test_loss,32.44786


Testing completed successfully
Error in sweep run: You must call wandb.init() before wandb.log()
Could not log error to wandb


[34m[1mwandb[0m: Agent Starting Run: zazle3nz with config:
[34m[1mwandb[0m: 	alpha: 0.14737726247198416
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	beta: 0.14629380091346617
[34m[1mwandb[0m: 	d_ff: 256
[34m[1mwandb[0m: 	d_model: 128
[34m[1mwandb[0m: 	decomp: 0
[34m[1mwandb[0m: 	directional_alpha: 0.505238165433068
[34m[1mwandb[0m: 	directional_beta: 0.8828115079754892
[34m[1mwandb[0m: 	directional_gamma: 0.18950806965563372
[34m[1mwandb[0m: 	dropout: 0.14618493174107625
[34m[1mwandb[0m: 	e_layers: 4
[34m[1mwandb[0m: 	k: 2
[34m[1mwandb[0m: 	learning_rate: 0.0001268656011128991
[34m[1mwandb[0m: 	loss: weighted_directional
[34m[1mwandb[0m: 	lradj: type1
[34m[1mwandb[0m: 	lstm_dropout: 0.2827350985691136
[34m[1mwandb[0m: 	lstm_hidden_size: 256
[34m[1mwandb[0m: 	lstm_layers: 1
[34m[1mwandb[0m: 	ma_type: dema
[34m[1mwandb[0m: 	patch_len: 12
[34m[1mwandb[0m: 	pred_len: 9
[34m[1mwandb[0m: 	revin: 1
[34m[1mwandb[0m: 	

Directional Loss Configuration:
  loss: weighted_directional
  directional_alpha: 0.505238165433068
  directional_beta: 0.8828115079754892
  directional_gamma: 0.18950806965563372
Starting sweep run: sweep_valiant-sweep-2
Key parameters: lr=0.000127, batch=8, patch_len=12, d_model=128
Loss function: weighted_directional
Patching info: seq_len=72, patch_len=12, stride=8, num_patches=8
All numeric parameter types:
  seq_len: 72 (type: <class 'int'>)
  pred_len: 9 (type: <class 'int'>)
  patch_len: 12 (type: <class 'int'>)
  stride: 8 (type: <class 'int'>)
  d_model: 128 (type: <class 'int'>)
  d_ff: 256 (type: <class 'int'>)
  e_layers: 4 (type: <class 'int'>)
  batch_size: 8 (type: <class 'int'>)
  train_epochs: 3 (type: <class 'int'>)
  learning_rate: 0.0001268656011128991 (type: <class 'float'>)
  dropout: 0.14618493174107625 (type: <class 'float'>)
  k: 2 (type: <class 'int'>)
  decomp: False (type: <class 'bool'>)
  revin: True (type: <class 'bool'>)
  directional_alpha: 0.505238165

0,1
config/ff_ratio,▁
config/model_complexity,▁
config/num_patches,▁
config/patch_coverage,▁
config/patch_ratio,▁
config/prediction_horizon,▁
config/stride_ratio,▁

0,1
config/ff_ratio,2
config/loss_type,weighted_directional...
config/model_complexity,512
config/num_patches,8
config/patch_coverage,0.94444
config/patch_ratio,0.16667
config/prediction_horizon,0.125
config/stride_ratio,0.66667
config/uses_directional_loss,True


Exp_Main created successfully
Starting training...
train 2638
val 382
test 768
	iters: 100, epoch: 1 | loss: 0.0034500
	speed: 0.0187s/iter; left time: 16.6220s
	iters: 100, epoch: 1 | loss: 0.0034500
	speed: 0.0187s/iter; left time: 16.6220s
	iters: 200, epoch: 1 | loss: 0.0168867
	speed: 0.0171s/iter; left time: 13.4767s
	iters: 200, epoch: 1 | loss: 0.0168867
	speed: 0.0171s/iter; left time: 13.4767s
	iters: 300, epoch: 1 | loss: 0.0059914
	speed: 0.0168s/iter; left time: 11.5882s
	iters: 300, epoch: 1 | loss: 0.0059914
	speed: 0.0168s/iter; left time: 11.5882s
Epoch: 1 cost time: 5.789806842803955
Epoch: 1 cost time: 5.789806842803955
Epoch: 1, Steps: 329 | Train Loss: 0.0115489 Vali Loss: 0.0258668 Test Loss: 0.3984232
Validation loss decreased (inf --> 0.025867).  Saving model ...
Updating learning rate to 0.0001268656011128991
Epoch: 1, Steps: 329 | Train Loss: 0.0115489 Vali Loss: 0.0258668 Test Loss: 0.3984232
Validation loss decreased (inf --> 0.025867).  Saving model ...
Upd

0,1
batch,▁▁▂▂▃▄▄▅▅▆▇▇██▁▂▂▃▃▄▅▅▆▆▇██▁▁▂▃▃▄▄▅▆▆▇▇█
batch_loss,▂▂▂▂▁▂▁▃▃▁▂▇▂▂▁▁▂▁▁▁█▄▂▁▁▂▂▂▂▁▁▂▁▇▁▁▄▁▁▁
best_sample_mse,▁
epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅██████████████
epoch_time,▁██
final_test_mae,▁
final_test_mse,▁
learning_rate,███████████████████████████▁▁▁▁▁▁▁▁▁▁▁▁▁
mse_std,▁
test_loss,█▄▁

0,1
batch,320.0
batch_loss,0.00203
best_sample_mse,0.00747
epoch,3.0
epoch_time,7.12411
final_test_mae,0.4169
final_test_mse,0.2848
learning_rate,6e-05
mse_std,0.41785
test_loss,0.2848


Testing completed successfully
Error in sweep run: You must call wandb.init() before wandb.log()
Could not log error to wandb


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: f036ia1s with config:
[34m[1mwandb[0m: 	alpha: 0.20407822218377245
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	beta: 0.3994678438200273
[34m[1mwandb[0m: 	d_ff: 128
[34m[1mwandb[0m: 	d_model: 256
[34m[1mwandb[0m: 	decomp: 0
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: f036ia1s with config:
[34m[1mwandb[0m: 	alpha: 0.20407822218377245
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	beta: 0.3994678438200273
[34m[1mwandb[0m: 	d_ff: 128
[34m[1mwandb[0m: 	d_model: 256
[34m[1mwandb[0m: 	decomp: 0
[34m[1mwandb[0m: 	directional_alpha: 0.5945837279232419
[34m[1mwandb[0m: 	directional_beta: 0.4753575237196804
[34m[1mwandb[0m: 	directional_gamma: 0.2123252506396879
[34m[1mwandb[0m: 	dropout: 0.24120777070578556
[34m[1mwandb[0m: 	e_layers: 2
[34m[1mwandb[0m: 	k: 2
[34m[1mwandb[0m: 	le

LSTM Configuration (converted to native Python types):
  lstm_hidden_size: 256 (type: <class 'int'>)
  lstm_layers: 4 (type: <class 'int'>)
  lstm_dropout: 0.11651177814852934 (type: <class 'float'>)
  lstm_bidirectional: True (type: <class 'bool'>)
Directional Loss Configuration:
  loss: directional_mse
  directional_alpha: 0.5945837279232419
  directional_beta: 0.4753575237196804
  directional_gamma: 0.2123252506396879
Starting sweep run: sweep_cosmic-sweep-3
Key parameters: lr=0.000908, batch=8, patch_len=12, d_model=256
Loss function: directional_mse
Patching info: seq_len=96, patch_len=12, stride=6, num_patches=15
All numeric parameter types:
  seq_len: 96 (type: <class 'int'>)
  pred_len: 6 (type: <class 'int'>)
  patch_len: 12 (type: <class 'int'>)
  stride: 6 (type: <class 'int'>)
  d_model: 256 (type: <class 'int'>)
  d_ff: 512 (type: <class 'int'>)
  e_layers: 2 (type: <class 'int'>)
  batch_size: 8 (type: <class 'int'>)
  train_epochs: 5 (type: <class 'int'>)
  learning_rate

0,1
config/ff_ratio,▁
config/lstm_dropout,▁
config/lstm_hidden_size,▁
config/lstm_layers,▁
config/model_complexity,▁
config/num_patches,▁
config/patch_coverage,▁
config/patch_ratio,▁
config/prediction_horizon,▁
config/stride_ratio,▁

0,1
config/ff_ratio,2
config/loss_type,directional_mse
config/lstm_bidirectional,True
config/lstm_dropout,0.11651
config/lstm_enabled,True
config/lstm_hidden_size,256
config/lstm_layers,4
config/model_complexity,512
config/num_patches,15
config/patch_coverage,1


Exp_Main created successfully
Starting training...
train 2617
val 385
test 771
	iters: 100, epoch: 1 | loss: 0.0515376
	speed: 0.1499s/iter; left time: 230.2984s
	iters: 100, epoch: 1 | loss: 0.0515376
	speed: 0.1499s/iter; left time: 230.2984s
	iters: 200, epoch: 1 | loss: 0.0499808
	speed: 0.1499s/iter; left time: 215.2469s
	iters: 200, epoch: 1 | loss: 0.0499808
	speed: 0.1499s/iter; left time: 215.2469s
	iters: 300, epoch: 1 | loss: 0.0512403
	speed: 0.1496s/iter; left time: 199.8832s
	iters: 300, epoch: 1 | loss: 0.0512403
	speed: 0.1496s/iter; left time: 199.8832s
Epoch: 1 cost time: 49.10138440132141
Epoch: 1 cost time: 49.10138440132141
Epoch: 1, Steps: 327 | Train Loss: 0.0546474 Vali Loss: 0.0648988 Test Loss: 0.2525138
Validation loss decreased (inf --> 0.064899).  Saving model ...
Updating learning rate to 0.0009075584190259272
Epoch: 1, Steps: 327 | Train Loss: 0.0546474 Vali Loss: 0.0648988 Test Loss: 0.2525138
Validation loss decreased (inf --> 0.064899).  Saving model .

0,1
batch,▁▁▂▃▅▆▆██▁▂▂▃▃▄▅▅▆▇▇▁▁▂▃▄▆▇▇█▁▆▇█▁▂▄▅▅▆█
batch_loss,▃▁▁▄▅▅▆▂▄▃▂▂▅▁▁▃▄▁▁█▄▄▄▂▁▂▁▇▂▂▁▅▁▂▂▁▁▃▂▃
best_sample_mse,▁
epoch,▁▁▁▁▁▁▁▃▃▃▃▃▃▃▃▅▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆█████
epoch_time,▁▂▇▅█
final_test_mae,▁
final_test_mse,▁
learning_rate,█████████████████▄▄▄▄▄▄▄▄▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁
mse_std,▁
test_loss,█▄▂▁▁

0,1
batch,320.0
batch_loss,0.04188
best_sample_mse,0.00164
epoch,5.0
epoch_time,55.23691
final_test_mae,0.23035
final_test_mse,0.09898
learning_rate,0.00011
mse_std,0.18396
test_loss,0.10196


Testing completed successfully
Error in sweep run: You must call wandb.init() before wandb.log()
Could not log error to wandb
