In [3]:
import os
import time
import warnings
import math
import argparse 
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch import optim
from torch.utils.data import DataLoader
from sklearn.preprocessing import StandardScaler 

warnings.filterwarnings('ignore')

import sys
project_root = os.path.abspath('./') 
if project_root not in sys.path:
    sys.path.append(project_root)

from exp.exp_main import Exp_Main
from models import xPatch 
from data_provider.data_factory import data_provider
from data_provider.data_loader import Dataset_Custom 
from utils.tools import EarlyStopping, adjust_learning_rate, visual
from utils.metrics import metric

In [4]:
class Args:
    def __init__(self):
        self.is_training = 1
        self.train_only = False
        self.model_id = 'ETTh1_Pred_Notebook_Untuned'
        self.model = 'xPatch'

        self.data = 'custom'
        self.scale = True
        self.root_path = './data/' 
        #self.data_path = 'aapl_OHLCV.csv'
        self.data_path = 'ETTh1.csv'
        self.features = 'MS' # Multiple Features, Single target
        #self.target = 'Close' # The target feature
        self.target = 'OT' # The target feature
        self.freq = 'h'
        self.checkpoints = './checkpoints/' 
        self.embed = 'timeF' 

####################################################################################################################################################
# Hyperparameter Tuning
        # Forecasting Task
        self.seq_len = 48
        self.label_len = 48
        self.pred_len = 5
        self.enc_in = 7 # Ensure this matches the number of features in the CSV after 'date'

        # Patching
        self.patch_len = 12
        self.stride = 6
        self.padding_patch = 'end'

        # Moving Average
        self.ma_type = 'ema'
        self.alpha = 0.2
        self.beta = 0.2

        # Optimization
        self.num_workers = 8 # Adjust based on your system 
        self.itr = 1 
        self.train_epochs = 3
        self.batch_size = 16
        self.patience = 15
        self.learning_rate = 0.0001
        self.des = 'Notebook_Run_MS'
        self.loss = 'mae' 
        self.lradj = 'type1' # learning rate strategy
        self.revin = 1 
####################################################################################################################################################

        # GPU
        self.use_gpu = True if torch.cuda.is_available() else False
        self.gpu = 0 
        self.use_multi_gpu = False 
        self.devices = '0' 
        self.test_flop = False 

####################################################################################################################################################

        # Wandb configuration
        self.use_wandb = True  # Enable/disable wandb logging
        # Team name
        self.wandb_entity = 'xplstm'
        # Project name
        self.wandb_project = 'CS7643-GroupProject'
        #Run-specific notes
        self.experiment_notes = 'LSTM head after patching layer' 
        
####################################################################################################################################################   
        # Add missing xPatch parameters
        self.d_model = 128
        self.d_ff = 256
        self.e_layers = 3
        self.dropout = 0.1
        self.k = 3
        self.decomp = 0
        
####################################################################################################################################################   
   
        # Optimized LSTM Configuration for Network Layer
        self.use_lstm = True                    # Enable LSTM in Network
        self.lstm_hidden_size = 96              # Optimal hidden size
        self.lstm_layers = 2                    # Deep enough for temporal modeling
        self.lstm_dropout = 0.15                # Regularization
        self.lstm_bidirectional = True          # Better context understanding
        
        # Note: use_lstm_attention is removed since we're using LSTM in Network layer instead
        
####################################################################################################################################################   

args = Args()

if not os.path.exists(args.checkpoints):
    os.makedirs(args.checkpoints)

In [5]:

exp = Exp_Main(args)

print(f'Args for this run: {vars(args)}')

best_model = exp.train(args.model_id)


Use CPU


[34m[1mwandb[0m: Currently logged in as: [33mradeosimbio[0m ([33mxplstm[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Args for this run: {'is_training': 1, 'train_only': False, 'model_id': 'ETTh1_Pred_Notebook_Untuned', 'model': 'xPatch', 'data': 'custom', 'scale': True, 'root_path': './data/', 'data_path': 'ETTh1.csv', 'features': 'MS', 'target': 'OT', 'freq': 'h', 'checkpoints': './checkpoints/', 'embed': 'timeF', 'seq_len': 48, 'label_len': 48, 'pred_len': 5, 'enc_in': 7, 'patch_len': 12, 'stride': 6, 'padding_patch': 'end', 'ma_type': 'ema', 'alpha': 0.2, 'beta': 0.2, 'num_workers': 8, 'itr': 1, 'train_epochs': 3, 'batch_size': 16, 'patience': 15, 'learning_rate': 0.0001, 'des': 'Notebook_Run_MS', 'loss': 'mae', 'lradj': 'type1', 'revin': 1, 'use_gpu': False, 'gpu': 0, 'use_multi_gpu': False, 'devices': '0', 'test_flop': False, 'use_wandb': True, 'wandb_entity': 'xplstm', 'wandb_project': 'CS7643-GroupProject', 'experiment_notes': 'LSTM head after patching layer', 'd_model': 128, 'd_ff': 256, 'e_layers': 3, 'dropout': 0.1, 'k': 3, 'decomp': 0, 'use_lstm': True, 'lstm_hidden_size': 96, 'lstm_layers

In [6]:
print(args.model_id)
exp.test(args.model_id)


ETTh1_Pred_Notebook_Untuned
test 3480
[[[-1.5036958]
  [-1.5141006]
  [-1.5102413]
  [-1.5306369]
  [-1.5346196]]

 [[-1.5060335]
  [-1.5118217]
  [-1.4949538]
  [-1.5307986]
  [-1.5290812]]

 [[-1.5358615]
  [-1.5427783]
  [-1.5160813]
  [-1.5497489]
  [-1.549051 ]]

 [[-1.5025854]
  [-1.5116308]
  [-1.4973978]
  [-1.5237072]
  [-1.5225487]]

 [[-1.5314648]
  [-1.5380667]
  [-1.5169011]
  [-1.557597 ]
  [-1.5508335]]

 [[-1.5688978]
  [-1.5859139]
  [-1.5737325]
  [-1.5991282]
  [-1.5911303]]

 [[-1.5470504]
  [-1.5612429]
  [-1.5296541]
  [-1.5492663]
  [-1.5355536]]

 [[-1.4430931]
  [-1.4568661]
  [-1.4517603]
  [-1.4594373]
  [-1.4555371]]

 [[-1.4870739]
  [-1.4986005]
  [-1.476331 ]
  [-1.5006168]
  [-1.5020455]]

 [[-1.5058038]
  [-1.5176574]
  [-1.4895283]
  [-1.5167506]
  [-1.5127211]]

 [[-1.4517092]
  [-1.4661981]
  [-1.4468231]
  [-1.4573348]
  [-1.4523464]]

 [[-1.4072155]
  [-1.4210333]
  [-1.4198924]
  [-1.429887 ]
  [-1.4184579]]

 [[-1.446471 ]
  [-1.4650234]
  [-1.46

0,1
batch,▂▂▃▃▄▅▅▅▅▆▆▇▇██▂▃▃▃▄▆▇▇▇▇█▁▂▂▂▄▄▅▆▆▇▇▇▇█
batch_loss,▇▇▆▆█▅▅▇▆▂▅▆▃▄▂▂▃▃▃▂▂▁▂▄▁▃▃▃▃▃▄▃▂▂▃▃▁▂▁▂
best_sample_mse,▁
epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅███████████████
epoch_time,█▁▂
final_test_mae,▁
final_test_mse,▁
learning_rate,███████████████████████████▁▁▁▁▁▁▁▁▁▁▁▁▁
mse_std,▁
test_loss,█▂▁

0,1
batch,740.0
batch_loss,0.09017
best_sample_mse,7e-05
epoch,3.0
epoch_time,20.24634
final_test_mae,0.08878
final_test_mse,0.01653
learning_rate,5e-05
mse_std,0.04048
test_loss,0.01653


In [5]:
folder_path = os.path.join('./test_results/', args.model_id + '/')
predictions_path = os.path.join(folder_path, 'pred.npy')
trues_path = os.path.join(folder_path, 'true.npy')

if os.path.exists(predictions_path) and os.path.exists(trues_path):
    all_preds = np.load(predictions_path)
    all_trues = np.load(trues_path)

    import matplotlib.pyplot as plt


    sample_idx = 10 # Pick a sample to visualize

    plt.figure(figsize=(12, 6))
    plt.plot(all_trues[sample_idx, :, 0], label='Ground Truth')
    plt.plot(all_preds[sample_idx, :, 0], label='Prediction')
    plt.title(f'Sample {sample_idx} Prediction vs Ground Truth (Last Channel)')
    plt.xlabel('Time Step (future days)')
    plt.ylabel('Price')
    plt.legend()
    plt.grid(False)
    plt.show()

else:
    print("Prediction and true value files not found. Ensure test() ran successfully.")

#### **Bellow:** Alternative training configuration with sweeps (hyperparameter tuning by wandb). Run in alternative to the above training loop.

In [None]:
# Complete sweep cell for notebook
import wandb

def notebook_sweep():
    # Advanced sweep config
    sweep_config = {
        'method': 'bayes',
        'metric': {
            'name': 'final_test_mse', 
            'goal': 'minimize'
        },
        'parameters': {
                # Learning optimization
                'learning_rate': {
                    'distribution': 'log_uniform_values',
                    'min': 0.00001,
                    'max': 0.001
                },
                'batch_size': {'values': [8, 16, 32]},  # Reduced range
                'train_epochs': {'values': [3, 5, 8]},  # Reduced range
                
                # Model architecture - xPatch specific
                'd_model': {'values': [64, 128, 256]},  # Reduced range
                'd_ff': {'values': [128, 256, 512, 768]},  # Fixed values instead of distribution
                'e_layers': {'values': [2, 3, 4]},  # Reduced range
                'dropout': {
                    'distribution': 'uniform',
                    'min': 0.1,
                    'max': 0.3
                },
                
                # Patching strategy - MORE CONSERVATIVE
                'patch_len': {'values': [8, 12, 16]},  # Reduced range, avoiding very small/large
                'stride': {'values': [4, 6, 8]},  # Reduced range
                
                # Sequence modeling - CONSERVATIVE
                'seq_len': {'values': [48, 72, 96]},  # Reduced range
                'pred_len': {'values': [3, 5, 7]},    # Reduced range
                
                # Moving average parameters
                'ma_type': {'values': ['ema', 'dema']},
                'alpha': {
                    'distribution': 'uniform',
                    'min': 0.1,
                    'max': 0.4
                },
                'beta': {
                    'distribution': 'uniform',
                    'min': 0.1,
                    'max': 0.4
                },
                
                # Advanced xPatch parameters
                'k': {'values': [2, 3, 4]},  # Reduced range
                'decomp': {'values': [0, 1]},
                
                # Learning rate scheduling
                'lradj': {'values': ['type1', 'type2']},  # Removed 'cosine' temporarily
                
                # Regularization
                'revin': {'values': [0, 1]},  # Reduced range
            
            # Loss function variants
            'loss_weight_decay': {
                'distribution': 'uniform',
                'min': 0.5,
                'max': 2.0
            }
        },
        
        # Advanced early termination
        'early_terminate': {
            'type': 'hyperband',
            'min_iter': 2,
            'max_iter': 15,
            'eta': 3,
            's': 2
        }
    }
    
    # Create sweep
    sweep_id = wandb.sweep(sweep_config, 
                          project="CS7643-GroupProject", 
                          entity="xplstm")
    
    def train_run():
        # Define Args class inside the function so it's available
        class Args:
            def __init__(self):
                self.is_training = 1
                self.train_only = False
                self.model_id = 'AAPL_Pred5_Notebook_Tuned'
                self.model = 'xPatch'
                self.data = 'custom'
                self.scale = True
                self.root_path = './data/' 
                self.data_path = 'ETTh1.csv'
                self.features = 'MS'
                self.target = 'OT'
                self.freq = 'h'
                self.checkpoints = './checkpoints/' 
                self.embed = 'timeF'
                self.label_len = 48
                self.enc_in = 7
                self.padding_patch = 'end'
                self.num_workers = 8
                self.itr = 1 
                self.patience = 15
                self.des = 'Sweep_Run'
                self.loss = 'mae' 
                self.revin = 1
                
                # Default values (will be overridden by sweep)
                self.seq_len = 48
                self.pred_len = 5
                self.patch_len = 12
                self.stride = 6
                self.ma_type = 'ema'
                self.alpha = 0.2
                self.beta = 0.2
                self.train_epochs = 3
                self.batch_size = 16
                self.learning_rate = 0.0001
                self.lradj = 'type1'
                self.d_model = 128
                self.d_ff = 256
                self.e_layers = 3
                self.dropout = 0.1
                self.k = 3
                self.decomp = 0
                
                # GPU settings
                self.use_gpu = True if torch.cuda.is_available() else False
                self.gpu = 0 
                self.use_multi_gpu = False 
                self.devices = '0' 
                self.test_flop = False 
                
                # Wandb settings
                self.use_wandb = True
                self.wandb_entity = 'xplstm'
                self.wandb_project = 'CS7643-GroupProject'
                self.experiment_notes = 'Sweep experiment'
        
        wandb.init()
        
        try:
            # Create Args instance
            args = Args()
            
            # Override all sweep parameters
            for param, value in wandb.config.items():
                if hasattr(args, param):
                    setattr(args, param, value)
            
            # Ensure model_id is unique
            args.model_id = f"sweep_{wandb.run.name}"
            
            # Advanced parameter validation and adjustment
            # Ensure stride <= patch_len
            if args.stride > args.patch_len:
                args.stride = args.patch_len // 2
                wandb.log({"param_adjustment": f"stride adjusted to {args.stride}"})
            
            # Ensure d_ff >= d_model
            if args.d_ff < args.d_model:
                args.d_ff = args.d_model * 2
                wandb.log({"param_adjustment": f"d_ff adjusted to {args.d_ff}"})
            
            # Ensure pred_len < seq_len
            if args.pred_len >= args.seq_len:
                args.pred_len = min(args.pred_len, args.seq_len // 4)
                wandb.log({"param_adjustment": f"pred_len adjusted to {args.pred_len}"})
            
            # Log parameter combinations for analysis
            wandb.log({
                "config/patch_ratio": args.patch_len / args.seq_len,
                "config/stride_ratio": args.stride / args.patch_len,
                "config/model_complexity": args.d_model * args.e_layers,
                "config/ff_ratio": args.d_ff / args.d_model,
                "config/prediction_horizon": args.pred_len / args.seq_len
            })
            
            # Add error handling for configuration issues
            if args.patch_len <= 0 or args.stride <= 0:
                raise ValueError(f"Invalid patch configuration: patch_len={args.patch_len}, stride={args.stride}")
            
            if args.d_model <= 0 or args.d_ff <= 0:
                raise ValueError(f"Invalid model dimensions: d_model={args.d_model}, d_ff={args.d_ff}")
            
            if not os.path.exists(args.checkpoints):
                os.makedirs(args.checkpoints)
            
            print(f"Starting sweep run: {args.model_id}")
            print(f"Key parameters: lr={args.learning_rate:.6f}, batch={args.batch_size}, "
                  f"patch_len={args.patch_len}, d_model={args.d_model}")
            
            # Create experiment and train
            exp = Exp_Main(args)
            best_model = exp.train(args.model_id)
            exp.test(args.model_id)
            
            # Log success
            wandb.log({"run_status": "completed"})
            
        except Exception as e:
            print(f"Error in sweep run: {str(e)}")
            wandb.log({
                "run_status": "failed",
                "error_message": str(e),
                "final_test_mse": float('inf')  # Ensure bad runs are penalized
            })
            raise e
    
    print(f"Sweep URL: https://wandb.ai/xplstm/CS7643-GroupProject/sweeps/{sweep_id}")
    print("Advanced sweep configuration:")
    print(f"- {len(sweep_config['parameters'])} hyperparameters")
    print(f"- Bayesian optimization with Hyperband early termination")
    print(f"- Parameter validation and auto-adjustment")
    
    # Run sweep with more experiments for comprehensive search
    wandb.agent(sweep_id, function=train_run, count=3)
    
    return sweep_id

# Uncomment to run advanced sweep
sweep_id = notebook_sweep()

Create sweep with ID: 721yad0v
Sweep URL: https://wandb.ai/xplstm/CS7643-GroupProject/sweeps/721yad0v
Sweep URL: https://wandb.ai/xplstm/CS7643-GroupProject/sweeps/721yad0v
Advanced sweep configuration:
- 19 hyperparameters
- Bayesian optimization with Hyperband early termination
- Parameter validation and auto-adjustment


[34m[1mwandb[0m: Agent Starting Run: ke9z36dj with config:
[34m[1mwandb[0m: 	alpha: 0.20477411451465136
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.25398474044277786
[34m[1mwandb[0m: 	d_ff: 512
[34m[1mwandb[0m: 	d_model: 128
[34m[1mwandb[0m: 	decomp: 1
[34m[1mwandb[0m: 	dropout: 0.24739758628144495
[34m[1mwandb[0m: 	e_layers: 4
[34m[1mwandb[0m: 	k: 3
[34m[1mwandb[0m: 	learning_rate: 0.0002293135929709356
[34m[1mwandb[0m: 	loss_weight_decay: 0.8358802964194618
[34m[1mwandb[0m: 	lradj: type1
[34m[1mwandb[0m: 	ma_type: dema
[34m[1mwandb[0m: 	patch_len: 16
[34m[1mwandb[0m: 	pred_len: 5
[34m[1mwandb[0m: 	revin: 1
[34m[1mwandb[0m: 	seq_len: 96
[34m[1mwandb[0m: 	stride: 8
[34m[1mwandb[0m: 	train_epochs: 3


Starting sweep run: sweep_likely-sweep-1
Key parameters: lr=0.000229, batch=32, patch_len=16, d_model=128
Use CPU
Error in sweep run: 'float' object has no attribute 'to'


0,1
config/ff_ratio,▁
config/model_complexity,▁
config/patch_ratio,▁
config/prediction_horizon,▁
config/stride_ratio,▁

0,1
config/ff_ratio,4
config/model_complexity,512
config/patch_ratio,0.16667
config/prediction_horizon,0.05208
config/stride_ratio,0.5
error_message,'float' object has n...
final_test_mse,inf
run_status,failed


[34m[1mwandb[0m: [32m[41mERROR[0m Run ke9z36dj errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/python/3.12.1/lib/python3.12/site-packages/wandb/agents/pyagent.py", line 302, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/tmp/ipykernel_9542/2736668503.py", line 213, in train_run
[34m[1mwandb[0m: [32m[41mERROR[0m     raise e
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/tmp/ipykernel_9542/2736668503.py", line 199, in train_run
[34m[1mwandb[0m: [32m[41mERROR[0m     exp = Exp_Main(args)
[34m[1mwandb[0m: [32m[41mERROR[0m           ^^^^^^^^^^^^^^
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/workspaces/Finance-Time-Series-Forcasting/exp/exp_main.py", line 22, in __init__
[34m[1mwandb[0m: [32m[41mERROR[0m     super(Exp_Main, self).__init__(args)
[34m[1mwandb[0m: [32m[41mERROR[0m   Fil

Starting sweep run: sweep_quiet-sweep-2
Key parameters: lr=0.000202, batch=32, patch_len=16, d_model=256
Use CPU


0,1
config/ff_ratio,▁
config/model_complexity,▁
config/patch_ratio,▁
config/prediction_horizon,▁
config/stride_ratio,▁

0,1
config/ff_ratio,2
config/model_complexity,512
config/patch_ratio,0.16667
config/prediction_horizon,0.07292
config/stride_ratio,0.5
param_adjustment,d_ff adjusted to 512...


train 12092
val 1736
test 3478
	iters: 100, epoch: 1 | loss: 0.3392132
	speed: 0.0138s/iter; left time: 40.3113s
	iters: 200, epoch: 1 | loss: 0.2884287
	speed: 0.0122s/iter; left time: 34.3627s
	iters: 300, epoch: 1 | loss: 0.3368687
	speed: 0.0121s/iter; left time: 32.9613s
Epoch: 1 cost time: 4.803227424621582
Epoch: 1, Steps: 377 | Train Loss: 0.3395828 Vali Loss: 0.4639564 Test Loss: 0.3872925
Validation loss decreased (inf --> 0.463956).  Saving model ...
	iters: 100, epoch: 2 | loss: 0.2141321
	speed: 0.0346s/iter; left time: 87.8965s
	iters: 200, epoch: 2 | loss: 0.1845696
	speed: 0.0125s/iter; left time: 30.5577s
	iters: 300, epoch: 2 | loss: 0.2750836
	speed: 0.0123s/iter; left time: 28.7398s
Epoch: 2 cost time: 4.917123794555664
Epoch: 2, Steps: 377 | Train Loss: 0.2116692 Vali Loss: 0.3454794 Test Loss: 0.1796886
Validation loss decreased (0.463956 --> 0.345479).  Saving model ...
Updating learning rate to 5e-05
	iters: 100, epoch: 3 | loss: 0.1627022
	speed: 0.0354s/iter; 

[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


Exception in thread Thread-69 (_run_job):
Traceback (most recent call last):
  File "/tmp/ipykernel_9542/2736668503.py", line 200, in train_run
  File "/workspaces/Finance-Time-Series-Forcasting/exp/exp_main.py", line 166, in train
    for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
  File "/home/codespace/.local/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 733, in __next__
    data = self._next_data()
           ^^^^^^^^^^^^^^^^^
  File "/home/codespace/.local/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 1491, in _next_data
    idx, data = self._get_data()
                ^^^^^^^^^^^^^^^^
  File "/home/codespace/.local/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 1453, in _get_data
    success, data = self._try_get_data()
                    ^^^^^^^^^^^^^^^^^^^^
  File "/home/codespace/.local/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 1284, in _try_get_data
    d

Error in sweep run: [Errno 2] No such file or directory


In [13]:
# Complete sweep cell for notebook
import wandb

def notebook_sweep():
    # Advanced sweep config
    sweep_config = {
        'method': 'bayes',
        'metric': {
            'name': 'final_test_mse', 
            'goal': 'minimize'
        },
        'parameters': {
                # Learning optimization
                'learning_rate': {
                    'distribution': 'log_uniform_values',
                    'min': 0.00001,
                    'max': 0.001
                },
                'batch_size': {'values': [8, 16, 32]},  # Reduced range
                'train_epochs': {'values': [3, 5, 8]},  # Reduced range
                
                # Model architecture - xPatch specific
                'd_model': {'values': [64, 128, 256]},  # Reduced range
                'd_ff': {'values': [128, 256, 512, 768]},  # Fixed values instead of distribution
                'e_layers': {'values': [2, 3, 4]},  # Reduced range
                'dropout': {
                    'distribution': 'uniform',
                    'min': 0.1,
                    'max': 0.3
                },
                
                # Patching strategy - MORE CONSERVATIVE
                'patch_len': {'values': [8, 12, 16]},  # Reduced range, avoiding very small/large
                'stride': {'values': [4, 6, 8]},  # Reduced range
                
                # Sequence modeling - CONSERVATIVE
                'seq_len': {'values': [48, 72, 96]},  # Reduced range
                'pred_len': {'values': [3, 5, 7]},    # Reduced range
                
                # Moving average parameters
                'ma_type': {'values': ['ema', 'dema']},
                'alpha': {
                    'distribution': 'uniform',
                    'min': 0.1,
                    'max': 0.4
                },
                'beta': {
                    'distribution': 'uniform',
                    'min': 0.1,
                    'max': 0.4
                },
                
                # Advanced xPatch parameters
                'k': {'values': [2, 3, 4]},  # Reduced range
                'decomp': {'values': [0, 1]},
                
                # Learning rate scheduling
                'lradj': {'values': ['type1', 'type2']},  # Removed 'cosine' temporarily
                
                # Regularization
                'revin': {'values': [0, 1]},  # Reduced range
            
            # Loss function variants
            'loss_weight_decay': {
                'distribution': 'uniform',
                'min': 0.5,
                'max': 2.0
            }
        },
        
        # Advanced early termination
        'early_terminate': {
            'type': 'hyperband',
            'min_iter': 2,
            'max_iter': 15,
            'eta': 3,
            's': 2
        }
    }
    
    # Create sweep
    sweep_id = wandb.sweep(sweep_config, 
                          project="CS7643-GroupProject", 
                          entity="xplstm")
    
    def train_run():
    # Define Args class inside the function so it's available
        class Args:
            def __init__(self):
                self.is_training = 1
                self.train_only = False
                self.model_id = 'AAPL_Pred5_Notebook_Tuned'
                self.model = 'xPatch'
                self.data = 'custom'
                self.scale = True
                self.root_path = './data/' 
                self.data_path = 'ETTh1.csv'
                self.features = 'MS'
                self.target = 'OT'
                self.freq = 'h'
                self.checkpoints = './checkpoints/' 
                self.embed = 'timeF'
                self.label_len = 48
                self.enc_in = 7
                self.padding_patch = 'end'
                self.num_workers = 8
                self.itr = 1 
                self.patience = 15
                self.des = 'Sweep_Run'
                self.loss = 'mae' 
                self.revin = 1
                
                # Default values (will be overridden by sweep)
                self.seq_len = 48
                self.pred_len = 5
                self.patch_len = 12
                self.stride = 6
                self.ma_type = 'ema'
                self.alpha = 0.2
                self.beta = 0.2
                self.train_epochs = 3
                self.batch_size = 16
                self.learning_rate = 0.0001
                self.lradj = 'type1'
                self.d_model = 128
                self.d_ff = 256
                self.e_layers = 3
                self.dropout = 0.1
                self.k = 3
                self.decomp = 0
                
                # GPU settings
                self.use_gpu = True if torch.cuda.is_available() else False
                self.gpu = 0 
                self.use_multi_gpu = False 
                self.devices = '0' 
                self.test_flop = False 
                
                # Wandb settings
                self.use_wandb = True
                self.wandb_entity = 'xplstm'
                self.wandb_project = 'CS7643-GroupProject'
                self.experiment_notes = 'Sweep experiment'
        
        # Initialize wandb run
        run = wandb.init()
        
        try:
            # Create Args instance
            args = Args()
            
            # Override all sweep parameters
            for param, value in wandb.config.items():
                if hasattr(args, param):
                    setattr(args, param, value)
            
            # Ensure model_id is unique
            args.model_id = f"sweep_{wandb.run.name}"
            
            # CRITICAL: Enhanced parameter validation and adjustment
            # 1. Ensure patch_len is reasonable for seq_len
            if args.patch_len > args.seq_len:
                args.patch_len = args.seq_len // 4
                wandb.log({"param_adjustment": f"patch_len adjusted to {args.patch_len}"})
            
            # 2. Ensure stride is reasonable for patch_len
            if args.stride > args.patch_len:
                args.stride = max(1, args.patch_len // 2)
                wandb.log({"param_adjustment": f"stride adjusted to {args.stride}"})
            
            # 3. Calculate number of patches and ensure it's > 0
            num_patches = max(1, (args.seq_len - args.patch_len) // args.stride + 1)
            if num_patches <= 0:
                # Adjust parameters to ensure at least 1 patch
                args.patch_len = min(args.patch_len, args.seq_len)
                args.stride = min(args.stride, args.patch_len)
                num_patches = max(1, (args.seq_len - args.patch_len) // args.stride + 1)
                wandb.log({
                    "param_adjustment": f"Fixed patching: patch_len={args.patch_len}, stride={args.stride}, num_patches={num_patches}"
                })
            
            # 4. Ensure d_ff >= d_model
            if args.d_ff < args.d_model:
                args.d_ff = args.d_model * 2
                wandb.log({"param_adjustment": f"d_ff adjusted to {args.d_ff}"})
            
            # 5. Ensure pred_len < seq_len
            if args.pred_len >= args.seq_len:
                args.pred_len = min(args.pred_len, args.seq_len // 4)
                wandb.log({"param_adjustment": f"pred_len adjusted to {args.pred_len}"})
            
            # 6. Additional validation for xPatch specific parameters
            if args.patch_len < 4:
                args.patch_len = 4
                wandb.log({"param_adjustment": f"patch_len minimum adjusted to {args.patch_len}"})
            
            if args.stride < 1:
                args.stride = 1
                wandb.log({"param_adjustment": f"stride minimum adjusted to {args.stride}"})
            
            # 7. Validate the final configuration
            final_num_patches = (args.seq_len - args.patch_len) // args.stride + 1
            if final_num_patches <= 0:
                raise ValueError(f"Invalid configuration results in {final_num_patches} patches. "
                            f"seq_len={args.seq_len}, patch_len={args.patch_len}, stride={args.stride}")
            
            # Log parameter combinations for analysis
            wandb.log({
                "config/patch_ratio": args.patch_len / args.seq_len,
                "config/stride_ratio": args.stride / args.patch_len,
                "config/model_complexity": args.d_model * args.e_layers,
                "config/ff_ratio": args.d_ff / args.d_model,
                "config/prediction_horizon": args.pred_len / args.seq_len,
                "config/num_patches": final_num_patches,
                "config/patch_coverage": (args.patch_len + (final_num_patches - 1) * args.stride) / args.seq_len
            })
            
            # Add error handling for configuration issues
            if args.patch_len <= 0 or args.stride <= 0:
                raise ValueError(f"Invalid patch configuration: patch_len={args.patch_len}, stride={args.stride}")
            
            if args.d_model <= 0 or args.d_ff <= 0:
                raise ValueError(f"Invalid model dimensions: d_model={args.d_model}, d_ff={args.d_ff}")
            
            if not os.path.exists(args.checkpoints):
                os.makedirs(args.checkpoints)
            
            print(f"Starting sweep run: {args.model_id}")
            print(f"Key parameters: lr={args.learning_rate:.6f}, batch={args.batch_size}, "
                f"patch_len={args.patch_len}, d_model={args.d_model}")
            print(f"Patching info: seq_len={args.seq_len}, patch_len={args.patch_len}, "
                f"stride={args.stride}, num_patches={final_num_patches}")
            
            # Create experiment and train
            exp = Exp_Main(args)
            best_model = exp.train(args.model_id)
            exp.test(args.model_id)
            
            # Log success
            if wandb.run is not None:
                wandb.log({"run_status": "completed"})
            
        except Exception as e:
            print(f"Error in sweep run: {str(e)}")
            # More robust error logging
            try:
                if wandb.run is not None:
                    wandb.log({
                        "run_status": "failed",
                        "error_message": str(e),
                        "final_test_mse": float('inf')  # Ensure bad runs are penalized
                    })
            except:
                print("Could not log error to wandb")
            
            # Don't re-raise the exception - just let the run fail gracefully
            return
        
        finally:
            # Ensure wandb run is properly finished
            if wandb.run is not None:
                wandb.finish()
    
    print(f"Sweep URL: https://wandb.ai/xplstm/CS7643-GroupProject/sweeps/{sweep_id}")
    print("Advanced sweep configuration:")
    print(f"- {len(sweep_config['parameters'])} hyperparameters")
    print(f"- Bayesian optimization with Hyperband early termination")
    print(f"- Parameter validation and auto-adjustment")
    
    # Run sweep with more experiments for comprehensive search
    wandb.agent(sweep_id, function=train_run, count=3)
    
    return sweep_id

# Uncomment to run advanced sweep
sweep_id = notebook_sweep()

Create sweep with ID: dfs0bz1y
Sweep URL: https://wandb.ai/xplstm/CS7643-GroupProject/sweeps/dfs0bz1y
Sweep URL: https://wandb.ai/xplstm/CS7643-GroupProject/sweeps/dfs0bz1y
Advanced sweep configuration:
- 19 hyperparameters
- Bayesian optimization with Hyperband early termination
- Parameter validation and auto-adjustment


[34m[1mwandb[0m: Agent Starting Run: 44kjaaml with config:
[34m[1mwandb[0m: 	alpha: 0.1329597357733036
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta: 0.29378334377376036
[34m[1mwandb[0m: 	d_ff: 128
[34m[1mwandb[0m: 	d_model: 128
[34m[1mwandb[0m: 	decomp: 1
[34m[1mwandb[0m: 	dropout: 0.19092579130934695
[34m[1mwandb[0m: 	e_layers: 3
[34m[1mwandb[0m: 	k: 3
[34m[1mwandb[0m: 	learning_rate: 4.31697164823031e-05
[34m[1mwandb[0m: 	loss_weight_decay: 0.5070863549376494
[34m[1mwandb[0m: 	lradj: type2
[34m[1mwandb[0m: 	ma_type: ema
[34m[1mwandb[0m: 	patch_len: 16
[34m[1mwandb[0m: 	pred_len: 7
[34m[1mwandb[0m: 	revin: 1
[34m[1mwandb[0m: 	seq_len: 96
[34m[1mwandb[0m: 	stride: 4
[34m[1mwandb[0m: 	train_epochs: 8


Starting sweep run: sweep_cool-sweep-1
Key parameters: lr=0.000043, batch=32, patch_len=16, d_model=128
Patching info: seq_len=96, patch_len=16, stride=4, num_patches=21
Use CPU


0,1
config/ff_ratio,▁
config/model_complexity,▁
config/num_patches,▁
config/patch_coverage,▁
config/patch_ratio,▁
config/prediction_horizon,▁
config/stride_ratio,▁

0,1
config/ff_ratio,1.0
config/model_complexity,384.0
config/num_patches,21.0
config/patch_coverage,1.0
config/patch_ratio,0.16667
config/prediction_horizon,0.07292
config/stride_ratio,0.25


train 12092
val 1736
test 3478
	iters: 100, epoch: 1 | loss: 0.1387980
	speed: 0.0227s/iter; left time: 66.2229s
	iters: 200, epoch: 1 | loss: 0.1189318
	speed: 0.0203s/iter; left time: 57.0536s
	iters: 300, epoch: 1 | loss: 0.1338258
	speed: 0.0199s/iter; left time: 53.9567s
Epoch: 1 cost time: 7.989301443099976
Epoch: 1, Steps: 377 | Train Loss: 0.1391927 Vali Loss: 0.0708973 Test Loss: 0.0510343
Validation loss decreased (inf --> 0.070897).  Saving model ...
	iters: 100, epoch: 2 | loss: 0.1333826
	speed: 0.0515s/iter; left time: 130.9004s
	iters: 200, epoch: 2 | loss: 0.0943144
	speed: 0.0199s/iter; left time: 48.5061s
	iters: 300, epoch: 2 | loss: 0.0900629
	speed: 0.0194s/iter; left time: 45.3046s
Epoch: 2 cost time: 7.715447425842285
Epoch: 2, Steps: 377 | Train Loss: 0.0993500 Vali Loss: 0.0536858 Test Loss: 0.0338554
Validation loss decreased (0.070897 --> 0.053686).  Saving model ...
Updating learning rate to 5e-05
	iters: 100, epoch: 3 | loss: 0.0865256
	speed: 0.0496s/iter;

0,1
batch,▄▅▇█▁▃█▂▂▃▅▆▆▇█▁▂▃▄▅▁▁▂▄▄██▂▄▄█▁▂▆▆▁▄▅▆▇
batch_loss,██▇▆▆▄▅▄▄▄▃▂▃▃▃▃▄▂▃▂▂▂▃▂▂▃▅▃▃▂▁▁▂▁▄▃▃▁▂▃
best_sample_mse,▁
epoch,▁▁▁▁▂▂▂▂▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇██████
epoch_time,▇▁▂█▆▅▁▁
final_test_mae,▁
final_test_mse,▁
learning_rate,▇▇▇▇▇▇▇▇▇▇▇▇██████████▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
mse_std,▁
test_loss,█▄▂▁▁▁▁▁

0,1
batch,360.0
batch_loss,0.07855
best_sample_mse,3e-05
epoch,8.0
epoch_time,8.95793
final_test_mae,0.10142
final_test_mse,0.02132
learning_rate,1e-05
mse_std,0.0449
test_loss,0.02132


[34m[1mwandb[0m: Agent Starting Run: 331hgjyj with config:
[34m[1mwandb[0m: 	alpha: 0.25947374214422514
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	beta: 0.23461385902220108
[34m[1mwandb[0m: 	d_ff: 128
[34m[1mwandb[0m: 	d_model: 256
[34m[1mwandb[0m: 	decomp: 1
[34m[1mwandb[0m: 	dropout: 0.1419477954094698
[34m[1mwandb[0m: 	e_layers: 2
[34m[1mwandb[0m: 	k: 2
[34m[1mwandb[0m: 	learning_rate: 3.123140127628863e-05
[34m[1mwandb[0m: 	loss_weight_decay: 1.5704356395080463
[34m[1mwandb[0m: 	lradj: type2
[34m[1mwandb[0m: 	ma_type: ema
[34m[1mwandb[0m: 	patch_len: 8
[34m[1mwandb[0m: 	pred_len: 7
[34m[1mwandb[0m: 	revin: 1
[34m[1mwandb[0m: 	seq_len: 72
[34m[1mwandb[0m: 	stride: 8
[34m[1mwandb[0m: 	train_epochs: 3


Starting sweep run: sweep_glamorous-sweep-2
Key parameters: lr=0.000031, batch=8, patch_len=8, d_model=256
Patching info: seq_len=72, patch_len=8, stride=8, num_patches=9
Use CPU


0,1
config/ff_ratio,▁
config/model_complexity,▁
config/num_patches,▁
config/patch_coverage,▁
config/patch_ratio,▁
config/prediction_horizon,▁
config/stride_ratio,▁

0,1
config/ff_ratio,2
config/model_complexity,512
config/num_patches,9
config/patch_coverage,1
config/patch_ratio,0.11111
config/prediction_horizon,0.09722
config/stride_ratio,1
param_adjustment,d_ff adjusted to 512...


train 12116
val 1736
test 3478
	iters: 100, epoch: 1 | loss: 0.1917458
	speed: 0.0092s/iter; left time: 40.8517s
	iters: 200, epoch: 1 | loss: 0.1733142
	speed: 0.0075s/iter; left time: 32.3996s
	iters: 300, epoch: 1 | loss: 0.0871408
	speed: 0.0066s/iter; left time: 27.8576s
	iters: 400, epoch: 1 | loss: 0.2151660
	speed: 0.0072s/iter; left time: 29.6688s
	iters: 500, epoch: 1 | loss: 0.1230252
	speed: 0.0071s/iter; left time: 28.5714s
	iters: 600, epoch: 1 | loss: 0.1557723
	speed: 0.0071s/iter; left time: 27.8080s
	iters: 700, epoch: 1 | loss: 0.1112086
	speed: 0.0068s/iter; left time: 25.9453s
	iters: 800, epoch: 1 | loss: 0.2216544
	speed: 0.0068s/iter; left time: 25.3074s
	iters: 900, epoch: 1 | loss: 0.1283960
	speed: 0.0066s/iter; left time: 24.0575s
	iters: 1000, epoch: 1 | loss: 0.1465994
	speed: 0.0067s/iter; left time: 23.6444s
	iters: 1100, epoch: 1 | loss: 0.1248783
	speed: 0.0067s/iter; left time: 22.9196s
	iters: 1200, epoch: 1 | loss: 0.0978145
	speed: 0.0066s/iter; le

0,1
batch,▂▂▂▃▃▆▆▆▇▇█▁▂▃▃▄▅▅▅▆▆▆▇▇▇▇██▁▁▂▂▂▃▃▄▅▅▆█
batch_loss,▅▄▅▄▂▄▅▃█▆▂▃▃▃▃▄▄▅▃▂▁▃▂▃▂▃▂▃▂▃▃▂▂▂▂▃▂▁▁▂
best_sample_mse,▁
epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅███████████████
epoch_time,█▂▁
final_test_mae,▁
final_test_mse,▁
learning_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁██████████
mse_std,▁
test_loss,█▂▁

0,1
batch,1500.0
batch_loss,0.07978
best_sample_mse,0.00013
epoch,3.0
epoch_time,12.93161
final_test_mae,0.10597
final_test_mse,0.02259
learning_rate,5e-05
mse_std,0.04568
test_loss,0.02259


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: u7uwns7i with config:
[34m[1mwandb[0m: 	alpha: 0.17858413918976151
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta: 0.3787823853018955
[34m[1mwandb[0m: 	d_ff: 512
[34m[1mwandb[0m: 	d_model: 256
[34m[1mwandb[0m: 	decomp: 0
[34m[1mwandb[0m: 	dropout: 0.2672522657727061
[34m[1mwandb[0m: 	e_layers: 3
[34m[1mwandb[0m: 	k: 3
[34m[1mwandb[0m: 	learning_rate: 0.00014641657039420427
[34m[1mwandb[0m: 	loss_weight_decay: 0.9245550247129026
[34m[1mwandb[0m: 	lradj: type2
[34m[1mwandb[0m: 	ma_type: dema
[34m[1mwandb[0m: 	patch_len: 12
[34m[1mwandb[0m: 	pred_len: 7
[34m[1mwandb[0m: 	revin: 0
[34m[1mwandb[0m: 	seq_len: 96
[34m[1mwandb[0m: 	stride: 4
[34m[1mwandb[0m: 	train_epochs: 8


Starting sweep run: sweep_icy-sweep-3
Key parameters: lr=0.000146, batch=16, patch_len=12, d_model=256
Patching info: seq_len=96, patch_len=12, stride=4, num_patches=22
Use CPU
Error in sweep run: 'float' object has no attribute 'to'


0,1
config/ff_ratio,▁
config/model_complexity,▁
config/num_patches,▁
config/patch_coverage,▁
config/patch_ratio,▁
config/prediction_horizon,▁
config/stride_ratio,▁

0,1
config/ff_ratio,2
config/model_complexity,768
config/num_patches,22
config/patch_coverage,1
config/patch_ratio,0.125
config/prediction_horizon,0.07292
config/stride_ratio,0.33333
error_message,'float' object has n...
final_test_mse,inf
run_status,failed
