In [2]:
import random
import numpy as np
import torch
from exp.exp_long_term_forecasting import Exp_Long_Term_Forecast
from exp.exp_imputation import Exp_Imputation
from exp.exp_short_term_forecasting import Exp_Short_Term_Forecast
from exp.exp_anomaly_detection import Exp_Anomaly_Detection
from exp.exp_classification import Exp_Classification
from data_provider.data_creat import *
import akshare as ak
import datetime
from torch.utils.tensorboard import SummaryWriter

ModuleNotFoundError: No module named 'sktime'

In [2]:
class Args:
    '''基本配置'''
    # 选项：[long_term_forecast, short_term_forecast, imputation, classification, anomaly_detection]')
    task_name = 'long_term_forecast'
    is_training = 1
    model_id = 'Stock_96_96'
    # 模型名称，选项：[Autoformer, Transformer, TimesNet]
    model = 'TimesNet'
    
    '''股票数据获取'''
    fuquan = 'hfq'# 设置复权方式,adjust=空选择的不复权，qfq是前复权，应该用hfq后复权来进行量化分析
    period = 'daily' # 拉取时间周期{'daily', 'weekly', 'monthly'}
    start_date = '20151201'  # 下载数据的开始日期,0就是公司上市时间
    end_date = '-1'  # 下载数据的结束日期,如果0则到最后一天,如果-1是昨天.
    final_data_feat =  ['index', 'Volume','Tom_Chg'] # 删除不需要列的标签
    label_n = 7 # 预测未来连续多少天的收益率
    zhangfu = 0.03  # 预测涨幅大于等于3%的为1，小于3%的为0
    label_ch = False  # 如果是True ，预测n天以后上涨大于变量zhangfu为1，小于为0
    
    
    '''数据加载'''
    # 数据集类型,选项：[ETTh1,ETTh2,ETTm1,ETTm2,custom,m4,PSM,MSL,SMAP,SMD,SWAT,UEA]
    data = 'custom'
    root_path = './dataset/Stock/'
    data_path = 'Stock.csv'
    # 预测任务 M:多变量预测多变量, S:单变量预测单变量, MS:多变量预测单变量
    features = 'MS'
    # 目标列名，S或MS任务中的目标特征
    target = 'OT'
    # 时间采集粒度，选项：[s:秒, t:分钟, h:小时, d:天, b:工作日, w:周, m:月]
    freq = 'd'
    # 模型检查点的位置
    checkpoints = './checkpoints/'

    '''预测任务'''
    # 输入序列长度
    seq_len = 96
    # 开始标记长度
    label_len = 48
    # 预测序列长度
    pred_len = 1
    # 季节模式（针对M4数据集）
    seasonal_patterns = 'Monthly'
    inverse = False    # 反转输出数据

    '''插补任务'''
    # 插补任务中数据丢失率
    mask_rate = 0.25

    '''异常检测任务'''
    # 异常检测中异常点占比
    anomaly_ratio = 0.25

    '''模型定义'''
    # TimesBlock 中傅里叶变换,频率排名前k个周期
    top_k = 5
    # Inception 中卷积核个数
    num_kernels = 6
    # encoder 输入特征数
    enc_in = 14
    # decoder 输入特征数
    dec_in = 14
    # 输出通道数
    c_out = 14
    # 线性层隐含神经元个数
    d_model = 32
    # FFN 层隐含神经元个数
    d_ff = 32
    # 多头注意力机制
    n_heads = 8
    # encoder 层数
    e_layers = 2
    # decoder 层数
    d_layers = 1
    # 滑动窗口长度
    moving_avg = 25
    # 对 Q 进行采样，对 Q 采样的因子数
    factor = 3
    # 是否下采样操作 pooling
    distil = True
    # dropout 率
    dropout = 0.1
    # 时间特征嵌入方式,选项：[timeF, fixed, learned]
    embed = 'timeF'
    # 激活函数类型
    activation = 'gelu'
    # 是否输出 attention
    output_attention = False

    '''优化'''
    # 并行核心数
    num_workers = 10
    # 实验轮数
    itr = 1
    # 训练迭代次数
    train_epochs = 10
    # batch size 大小
    batch_size = 32
    # early stopping 机制容忍次数
    patience = 3
    # 学习率
    learning_rate = 0.0001
    # 实验描述
    des = 'test'
    # 损失函数
    loss = 'MSE'
    # 学习率下降策略
    lradj = 'type1'
    # 使用混合精度训练
    use_amp = False

    '''GPU'''
    # 使用 gpu
    use_gpu = False
    gpu = 0
    # 使用多个 gpus
    use_multi_gpu = False
    # 多 gpu 的设备 id
    devices = '0,1,2,3'

    '''去平稳化投影仪参数'''
    # 投影仪的隐藏层维度（列表）
    p_hidden_dims = [128, 128]
    # 投影仪中的隐藏层数
    p_hidden_layers = 2


# 创建参数对象
args = Args()

In [3]:
stock_list = ['601857'] # 测试用
for i in stock_list:
    NUM = i
    # 下载原始数据
    raw_data = download_data(NUM, args)
    # 拼接数据，添加各种参数
    ad_data = add_data(raw_data.copy(), args)
    # 添加预测标签
    ot_data = add_label(ad_data.copy(), args)
    # 删除无效数据
    su_data = sub_data(ot_data.copy(), args)
    args.des = NUM

获取数据时间为： 20151201 - 20231214
原始数据形状： (1957, 9)
添加数据以后形状： (1957, 15)
添加label以后数据形状: (1957, 17)
删除指定行、列后数据形状:  (1925, 15)


In [4]:
# 保存数据到 CSV 文件
su_data.to_csv(args.root_path + args.data_path, index=False)

In [5]:
# 设置随机种子以确保结果可重现
fix_seed = 2021
random.seed(fix_seed)
torch.manual_seed(fix_seed)
np.random.seed(fix_seed)

In [6]:
# 检查并设置 GPU
args.use_gpu = True if torch.cuda.is_available() and args.use_gpu else False
if args.use_gpu and args.use_multi_gpu:
    args.devices = args.devices.replace(' ', '')
    device_ids = args.devices.split(',')
    args.device_ids = [int(id_) for id_ in device_ids]
    args.gpu = args.device_ids[0]

# 选择合适的实验类
if args.task_name == 'long_term_forecast':
    Exp = Exp_Long_Term_Forecast
elif args.task_name == 'short_term_forecast':
    Exp = Exp_Short_Term_Forecast
elif args.task_name == 'imputation':
    Exp = Exp_Imputation
elif args.task_name == 'anomaly_detection':
    Exp = Exp_Anomaly_Detection
elif args.task_name == 'classification':
    Exp = Exp_Classification
else:
    Exp = Exp_Long_Term_Forecast  # 默认情况

# 进行训练和测试
if args.is_training:
    for ii in range(args.itr):
        # 实验记录设置
        setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'.format(
            args.task_name,
            args.model_id,
            args.model,
            args.data,
            args.features,
            args.seq_len,
            args.label_len,
            args.pred_len,
            args.d_model,
            args.n_heads,
            args.e_layers,
            args.d_layers,
            args.d_ff,
            args.factor,
            args.embed,
            args.distil,
            args.des, ii)

        exp = Exp(args)  # 设置实验
        print('>>>>>>>开始训练 : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting))
        exp.train(setting)

        print('>>>>>>>测试 : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
        exp.test(setting)
        if args.use_gpu:
            torch.cuda.empty_cache()
else:
    ii = 0
    setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'.format(
        args.task_name,
        args.model_id,
        args.model,
        args.data,
        args.features,
        args.seq_len,
        args.label_len,
        args.pred_len,
        args.d_model,
        args.n_heads,
        args.e_layers,
        args.d_layers,
        args.d_ff,
        args.factor,
        args.embed,
        args.distil,
        args.des, ii)

    exp = Exp(args)  # 设置实验
    print('>>>>>>>测试 : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
    exp.test(setting, test=1)
    if args.use_gpu:
        torch.cuda.empty_cache()

Use CPU
>>>>>>>开始训练 : long_term_forecast_Stock_96_96_TimesNet_custom_ftMS_sl96_ll48_pl1_dm32_nh8_el2_dl1_df32_fc3_ebtimeF_dtTrue_601857_0>>>>>>>>>>>>>>>>>>>>>>>>>>
train 1251
val 193
test 385
Epoch: 1 cost time: 58.26604437828064
Epoch: 1, Steps: 39 | Train Loss: 1.2268151 Vali Loss: 3.0360119 Test Loss: 1.6772472
Validation loss decreased (inf --> 3.036012).  Saving model ...
Updating learning rate to 0.0001
Epoch: 2 cost time: 55.617011070251465
Epoch: 2, Steps: 39 | Train Loss: 0.9422728 Vali Loss: 3.0409286 Test Loss: 1.5702196
EarlyStopping counter: 1 out of 3
Updating learning rate to 5e-05


In [None]:
%reload_ext tensorboard
%tensorboard --logdir=./runs/