In [2]:
import argparse
import os
import torch.distributed as dist
import torch
from exp.exp_LLM4TS import Exp_Main
from exp.exp_imputation import Exp_Imputation
from exp.exp_ad import Exp_Anomaly_Detection
from exp.exp_sf import Exp_Short_Term_Forecast
from exp.exp_classification import Exp_Classification
import random
import numpy as np

In [6]:
args_dict = {
    'random_seed': 42,  # 'random seed'
    'is_training': 1,  # 'status'
    'model_id': 'test',  # 'model id'
    'model': 'Autoformer',   # 'model name, options: [Autoformer, Informer, Transformer]'
    'task_name': 'long_term_forecast',  # 'task name, options: [long_term_forecast, short_term_forecast, imputation, classification, anomaly_detection]'

    # data loader
    'data': 'ETTm1',  # 'dataset type'
    'root_path': './data/ETT/',   # 'root path of the data file'
    'data_path': 'ETTh1.csv',  # 'data file'
    'features': 'M',  # 'forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate'
    'target': 'OT',  # 'target feature in S or MS task'
    'freq': 'h',  # 'freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h'
    'checkpoints': './checkpoints/',  # 'location of model checkpoints'
    'percent': 100,  
    'mask_rate': 0.25,  # 'mask ratio'
    'anomaly_ratio': 0.25,  # 'prior anomaly ratio (%)'

    # aLLM4TS
    'is_llm': 0,  # 'whether to use llm'
    'pretrain': 1,  # 'whether to use pretrained llm'
    'freeze': 1,  # 'whether to freeze specific part of the llm'
    'llm_layers': 1,  # 'the number of llm layers we use'
    'mask_pt': 0,  # 'mask pratrain ratio'
    'llm': './HF_MODELS/gpt2',  # 'the llm checkpoint'
    'attn_dropout': 0,  
    'proj_dropout': 0,  
    'res_attention': False,
    
    # SFT
    'sft': 0,  # Wheter to use SFT
    'sft_layers': 'null',  # 'the layers in llm needed to be trained'
    'history_len': 0,  # 'look-back window length'
    'fft': 0,  
    'rand_init': 0,  # 'rand_init'

    # Pretrain
    'c_pt': 0, # 'whether continue pretrain'  
    'pt_layers': 'null',  # 'the layers in llm needed to be trained'
    'pt_data': 'null',  # 'the dataset used in pretrain, use _ to separate'
    'pt_sft': 0,  # 'whether continue pretrain'
    'pt_sft_base_dir': 'null',   # 'the base model dir for pt_sft'
    'pt_sft_model': 'null',  # 'the base model for pt_sft'

    # Forecasting task
    'seq_len': 720,     # 'input sequence length'
    'label_len': 0,     # 'start token length'
    'pred_len': 720,    # 'prediction sequence length'
    'seasonal_patterns': 'Monthly',  # 'subset for M4'

    # PatchTST
    'fc_dropout': 0.05,  # 'fully connected dropout'
    'head_dropout': 0.0,  # 'head dropout'
    'patch_len': 16,  # 'patch length'
    'stride': 8,  # 'stride'
    'padding_patch': 'end',  # 'None: None; end: padding on the end' 
    'revin': 1,  # 'RevIN; True 1 False 0'
    'affine': 0,  # 'RevIN-affine; True 1 False 0'
    'subtract_last': 0,   # '0: subtract mean; 1: subtract last'
    'decomposition': 0,  # 'decomposition; True 1 False 0'
    'kernel_size': 25,  # 'decomposition-kernel'
    'individual': 0,  # 'individual head; True 1 False 0'
    'notrans': False,  #  stop using transformer
    
    # Formers
    'embed_type': 0,  # '0: default 1: value embedding + temporal embedding + positional embedding 2: value embedding + temporal embedding 3: value embedding + positional embedding 4: value embedding'
    'enc_in': 7,  # 'encoder input size'
    'dec_in': 7,  # 'decoder input size'
    'c_out': 7,  # 'output size'
    'd_model': 512,  # 'dimension of model'
    'n_heads': 8,  # 'num of heads'
    'e_layers': 2,  # 'num of encoder layers'
    'd_layers': 1,  # 'num of decoder layers'
    'd_ff': 8,  # 'dimension of fcn'
    'moving_avg': 25,  # 'window size of moving average'
    'factor': 1,  # 'attn factor'
    'destill': False, # whether to use distilling in encoder, using this argument means not using distilling
    'dropout': 0.1,  # 'dropout'
    'embed': 'timeF',  # 'time features encoding, options:[timeF, fixed, learned]'
    'activation': 'gelu',  # 'activation'
    'output_attention': False,  # whether to output attention in ecoder
    'do_predict': False,  # whether to predict unseen future data

    # optimization
    'num_workers': 4,   # 'data loader num workers'
    'itr': 2,  # 'experiments times'
    'train_epochs': 100,   # 'train epochs'
    'batch_size': 128,  # 'batch size of train input data'
    'patience': 100,  # 'early stopping patience'
    'learning_rate': 0.0001,  # 'optimizer learning rate'
    'des': 'test',  # 'exp description'
    'loss': 'mse',  # 'loss function'
    'lradj': 'type3',   # 'adjust learning rate'
    'pct_start': 0.3,  # 'pct_start'
    'use_amp': False,  # 'use automatic mixed precision'
    
    # GPU
    'use_gpu': True,  # 'use gpu'
    'gpu': 0,  # 'gpu'
    'use_multi_gpu': False,  # use multiple gpus
    'devices': '0,1,2,3',  # 'device ids of multiple gpus'
    'test_flop': False,  # 'test model flops'
}


In [7]:
len(args_dict)

84