In [10]:
import torch
from torch import nn
from torchinfo import summary

# 定义MLP模型
class MLP(nn.Module):
    def __init__(self, input_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# 定义BiLSTM模型
class BiLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(BiLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_size * 2, 1)  # 双向LSTM的输出是2倍隐藏层大小

    def forward(self, x):
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(x.device)  # 双向LSTM需要2倍层数的初始化
        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

# MLP 模型参数量
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
features = ['Open', 'High', 'Low', 'Close', 'Volume', 'Return', 'MA5', 'MA10', 'MA50', 'MA200', 
            'BBL_20_2.0', 'BBM_20_2.0', 'BBU_20_2.0', 'BBB_20_2.0', 'BBP_20_2.0', 'STOCHk_14_3_3', 
            'STOCHd_14_3_3', 'ATR', 'VWAP', 'CMO', 'RSI', 'MACD_12_26_9', 'MACDh_12_26_9', 
            'MACDs_12_26_9', 'Volatility']
mlp_model = MLP(input_size=len(features)).to(device)


# BiLSTM 模型参数量
bilstm_model = BiLSTM(input_size=len(features), hidden_size=64, num_layers=2).to(device)

# KAN 模型参数量
kan_model = KAN(
    layers_hidden=[len(features), 64, 128, 64, 1],
    grid_size=5,
    spline_order=3,
    scale_noise=0.1,
    scale_base=1.0,
    scale_spline=1.0,
    base_activation=torch.nn.ReLU,
    grid_eps=0.02,
    grid_range=[-1, 1],
).to(device)

summary(mlp_model, input_size=(32, len(features))),summary(bilstm_model, input_size=(32, 1, len(features))),summary(kan_model, input_size=(32, len(features)))


 Layer (type:depth-idx)                   Output Shape              Param #
 MLP                                      [32, 1]                   --
 ├─Linear: 1-1                            [32, 64]                  1,664
 ├─Linear: 1-2                            [32, 32]                  2,080
 ├─Linear: 1-3                            [32, 1]                   33
 Total params: 3,777
 Trainable params: 3,777
 Non-trainable params: 0
 Total mult-adds (M): 0.12
 Input size (MB): 0.00
 Forward/backward pass size (MB): 0.02
 Params size (MB): 0.02
 Estimated Total Size (MB): 0.04
 Layer (type:depth-idx)                   Output Shape              Param #
 BiLSTM                                   [32, 1]                   --
 ├─LSTM: 1-1                              [32, 1, 128]              145,920
 ├─Linear: 1-2                            [32, 1]                   129
 Total params: 146,049
 Trainable params: 146,049
 Non-trainable params: 0
 Total mult-adds (M): 4.67
 Input size (MB): 0.

---
# Baseline: MLP

In [1]:
import os
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn, optim
from sklearn.metrics import r2_score, mean_absolute_percentage_error
import plotly.graph_objs as go
import plotly.offline as py
from tqdm import tqdm

# 读取数据
file_path = '/teamspace/studios/this_studio/ping_an_data_with_pandas_ta_features.csv'
data = pd.read_csv(file_path)

# 填充缺失值
data = data.bfill().ffill()

# 列名示例
features = ['Open', 'High', 'Low', 'Close', 'Volume', 'Return', 'MA5', 'MA10', 'MA50', 'MA200', 
            'BBL_20_2.0', 'BBM_20_2.0', 'BBU_20_2.0', 'BBB_20_2.0', 'BBP_20_2.0', 'STOCHk_14_3_3', 
            'STOCHd_14_3_3', 'ATR', 'VWAP', 'CMO', 'RSI', 'MACD_12_26_9', 'MACDh_12_26_9', 
            'MACDs_12_26_9', 'Volatility']

# 数据集划分
train_size = int(len(data) * 0.7)
val_size = int(len(data) * 0.2)
test_size = len(data) - train_size - val_size

train_data = data[:train_size]
val_data = data[train_size:train_size + val_size]
test_data = data[train_size + val_size:]

class StockDataset(Dataset):
    def __init__(self, data, mean=None, std=None):
        self.data = data
        self.features = data[features].values.astype(np.float32)
        self.targets = data['Close'].values.astype(np.float32)

        if mean is None or std is None:
            self.mean = self.features.mean(axis=0)
            self.std = self.features.std(axis=0)
        else:
            self.mean = mean
            self.std = std

        self.features = (self.features - self.mean) / self.std

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        feature = self.features[idx]
        target = self.targets[idx]
        return torch.tensor(feature), torch.tensor(target)

# 定义MLP模型
class MLP(nn.Module):
    def __init__(self, input_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# 创建日志文件夹
logs_dir = "MLP_logs"
if not os.path.exists(logs_dir):
    os.makedirs(logs_dir)

# 获取下一个训练编号
def get_next_log_index(logs_dir):
    existing_logs = [int(name.split('_')[1]) for name in os.listdir(logs_dir) if os.path.isdir(os.path.join(logs_dir, name))]
    if not existing_logs:
        return 0
    return max(existing_logs) + 1

# 训练函数
def train(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for features, targets in loader:
        features, targets = features.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, targets.unsqueeze(1))
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * features.size(0)
    epoch_loss = running_loss / len(loader.dataset)
    return epoch_loss

# 验证函数
def validate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    all_targets = []
    all_predictions = []
    with torch.no_grad():
        for features, targets in loader:
            features, targets = features.to(device), targets.to(device)
            outputs = model(features)
            loss = criterion(outputs, targets.unsqueeze(1))
            running_loss += loss.item() * features.size(0)
            all_targets.extend(targets.cpu().numpy())
            all_predictions.extend(outputs.cpu().numpy())
    epoch_loss = running_loss / len(loader.dataset)
    return epoch_loss, r2_score(all_targets, all_predictions), mean_absolute_percentage_error(all_targets, all_predictions)

# 训练和验证过程
def train_and_validate(model, train_loader, val_loader, num_epochs, device):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    train_losses = []
    val_losses = []
    val_r2s = []
    val_mapes = []
    best_val_r2 = float('-inf')
    best_model_path = None

    # 创建新的子文件夹
    log_index = get_next_log_index(logs_dir)
    logs_dir = os.path.join(logs_dir, f'logs_{log_index}')
    os.makedirs(logs_dir)

    for epoch in tqdm(range(num_epochs)):
        train_loss = train(model, train_loader, criterion, optimizer, device)
        val_loss, val_r2, val_mape = validate(model, val_loader, criterion, device)
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        val_r2s.append(val_r2)
        val_mapes.append(val_mape)

        # Save the model if validation R2 is the best we've seen so far
        if val_r2 > best_val_r2:
            best_val_r2 = val_r2
            best_model_path = os.path.join(logs_dir, f'best_model_epoch_{epoch+1}_r2_{val_r2:.4f}.pth')
            torch.save(model.state_dict(), best_model_path)

        print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val R2: {val_r2:.4f}, Val MAPE: {val_mape:.4f}')

    # Load the best model before returning
    if best_model_path:
        model.load_state_dict(torch.load(best_model_path))
        print(f'Loaded best model from {best_model_path}')
    return train_losses, val_losses, val_r2s, val_mapes, best_model_path

# 滚动预测函数（带再训练）
def rolling_predict_with_retraining(model, train_data, val_data, test_data, window_size, device, num_epochs=5):
    model.eval()
    predictions = []
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    updated_train_data = pd.concat([train_data, val_data])

    for i in tqdm(range(len(test_data)), desc="Rolling Prediction"):
        # 获取滚动窗口数据
        window_data = pd.concat([updated_train_data, test_data[:i]], axis=0).tail(window_size)
        window_dataset = StockDataset(window_data, mean=train_dataset.mean, std=train_dataset.std)
        window_loader = DataLoader(window_dataset, batch_size=len(window_dataset), shuffle=False)

        # 进行预测
        with torch.no_grad():
            for features, _ in window_loader:
                features = features.to(device)
                prediction = model(features)
                predictions.append(prediction[-1].item())

        # 将预测值加入训练数据（模拟实际情况）
        new_row = test_data.iloc[i].copy()
        new_row['Close'] = predictions[-1]
        updated_train_data = pd.concat([updated_train_data, new_row.to_frame().T])

        # 重新训练模型
        train_loader = DataLoader(window_dataset, batch_size=batch_size, shuffle=False)
        for epoch in range(num_epochs):
            train_loss = train(model, train_loader, criterion, optimizer, device)

    return predictions

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size = 32

# 加载最佳模型并进行训练和验证
model = MLP(input_size=len(features)).to(device)
num_epochs = 100

train_dataset = StockDataset(train_data)
val_dataset = StockDataset(val_data, train_dataset.mean, train_dataset.std)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

train_losses, val_losses, val_r2s, val_mapes, best_model_path = train_and_validate(model, train_loader, val_loader, num_epochs, device)

# 在测试集上进行滚动预测
window_size = len(pd.concat([train_data, val_data]))  # 使用训练和验证数据作为初始窗口
model.load_state_dict(torch.load(best_model_path))  # 确保使用验证集上表现最好的模型
print(f'Using model from {best_model_path} for rolling prediction.')
test_predictions = rolling_predict_with_retraining(model, train_data, val_data, test_data, window_size, device)


  1%|          | 1/100 [00:00<00:27,  3.62it/s]

Epoch 1/100, Train Loss: 1255.0331, Val Loss: 4214.2652, Val R2: -14.2543, Val MAPE: 0.9203


  2%|▏         | 2/100 [00:00<00:24,  4.03it/s]

Epoch 2/100, Train Loss: 623.2124, Val Loss: 608.9630, Val R2: -1.2043, Val MAPE: 0.3833


  3%|▎         | 3/100 [00:00<00:22,  4.31it/s]

Epoch 3/100, Train Loss: 126.0715, Val Loss: 121.3278, Val R2: 0.5608, Val MAPE: 0.1576


  4%|▍         | 4/100 [00:00<00:21,  4.48it/s]

Epoch 4/100, Train Loss: 92.5350, Val Loss: 122.5131, Val R2: 0.5565, Val MAPE: 0.1530


  5%|▌         | 5/100 [00:01<00:20,  4.56it/s]

Epoch 5/100, Train Loss: 68.8127, Val Loss: 86.9854, Val R2: 0.6851, Val MAPE: 0.1317


  6%|▌         | 6/100 [00:01<00:20,  4.61it/s]

Epoch 6/100, Train Loss: 58.4171, Val Loss: 79.5171, Val R2: 0.7122, Val MAPE: 0.1229


  7%|▋         | 7/100 [00:01<00:20,  4.56it/s]

Epoch 7/100, Train Loss: 47.3669, Val Loss: 58.5730, Val R2: 0.7880, Val MAPE: 0.1061


  8%|▊         | 8/100 [00:01<00:20,  4.57it/s]

Epoch 8/100, Train Loss: 39.5541, Val Loss: 47.6239, Val R2: 0.8276, Val MAPE: 0.0949


  9%|▉         | 9/100 [00:02<00:19,  4.62it/s]

Epoch 9/100, Train Loss: 32.7804, Val Loss: 38.2884, Val R2: 0.8614, Val MAPE: 0.0844


 10%|█         | 10/100 [00:02<00:19,  4.63it/s]

Epoch 10/100, Train Loss: 27.4611, Val Loss: 31.0928, Val R2: 0.8875, Val MAPE: 0.0758


 11%|█         | 11/100 [00:02<00:19,  4.62it/s]

Epoch 11/100, Train Loss: 23.3309, Val Loss: 25.2215, Val R2: 0.9087, Val MAPE: 0.0682


 12%|█▏        | 12/100 [00:02<00:19,  4.54it/s]

Epoch 12/100, Train Loss: 20.1472, Val Loss: 20.8644, Val R2: 0.9245, Val MAPE: 0.0621


 13%|█▎        | 13/100 [00:02<00:18,  4.60it/s]

Epoch 13/100, Train Loss: 17.6201, Val Loss: 17.9138, Val R2: 0.9352, Val MAPE: 0.0573


 14%|█▍        | 14/100 [00:03<00:19,  4.43it/s]

Epoch 14/100, Train Loss: 15.5698, Val Loss: 15.7919, Val R2: 0.9428, Val MAPE: 0.0536


 15%|█▌        | 15/100 [00:03<00:18,  4.51it/s]

Epoch 15/100, Train Loss: 13.8908, Val Loss: 14.3309, Val R2: 0.9481, Val MAPE: 0.0506


 16%|█▌        | 16/100 [00:03<00:18,  4.60it/s]

Epoch 16/100, Train Loss: 12.4953, Val Loss: 13.2334, Val R2: 0.9521, Val MAPE: 0.0481


 17%|█▋        | 17/100 [00:03<00:18,  4.56it/s]

Epoch 17/100, Train Loss: 11.2864, Val Loss: 12.2573, Val R2: 0.9556, Val MAPE: 0.0458


 18%|█▊        | 18/100 [00:03<00:17,  4.60it/s]

Epoch 18/100, Train Loss: 10.2030, Val Loss: 11.4438, Val R2: 0.9586, Val MAPE: 0.0438


 19%|█▉        | 19/100 [00:04<00:18,  4.45it/s]

Epoch 19/100, Train Loss: 9.2281, Val Loss: 10.9041, Val R2: 0.9605, Val MAPE: 0.0422


 20%|██        | 20/100 [00:04<00:20,  3.97it/s]

Epoch 20/100, Train Loss: 8.3508, Val Loss: 10.4702, Val R2: 0.9621, Val MAPE: 0.0409


 21%|██        | 21/100 [00:04<00:21,  3.61it/s]

Epoch 21/100, Train Loss: 7.5628, Val Loss: 10.1766, Val R2: 0.9632, Val MAPE: 0.0398


 22%|██▏       | 22/100 [00:05<00:22,  3.47it/s]

Epoch 22/100, Train Loss: 6.8827, Val Loss: 9.9852, Val R2: 0.9639, Val MAPE: 0.0389


 23%|██▎       | 23/100 [00:05<00:28,  2.70it/s]

Epoch 23/100, Train Loss: 6.3422, Val Loss: 10.0272, Val R2: 0.9637, Val MAPE: 0.0384


 24%|██▍       | 24/100 [00:06<00:31,  2.38it/s]

Epoch 24/100, Train Loss: 5.9737, Val Loss: 10.1782, Val R2: 0.9632, Val MAPE: 0.0384


 25%|██▌       | 25/100 [00:06<00:32,  2.29it/s]

Epoch 25/100, Train Loss: 5.8286, Val Loss: 10.8861, Val R2: 0.9606, Val MAPE: 0.0392


 26%|██▌       | 26/100 [00:07<00:34,  2.14it/s]

Epoch 26/100, Train Loss: 6.1721, Val Loss: 13.2589, Val R2: 0.9520, Val MAPE: 0.0423


 27%|██▋       | 27/100 [00:07<00:32,  2.22it/s]

Epoch 27/100, Train Loss: 7.2080, Val Loss: 20.1877, Val R2: 0.9269, Val MAPE: 0.0503


 28%|██▊       | 28/100 [00:08<00:31,  2.27it/s]

Epoch 28/100, Train Loss: 9.2730, Val Loss: 35.1705, Val R2: 0.8727, Val MAPE: 0.0649


 29%|██▉       | 29/100 [00:08<00:28,  2.52it/s]

Epoch 29/100, Train Loss: 12.1679, Val Loss: 69.6720, Val R2: 0.7478, Val MAPE: 0.0917


 30%|███       | 30/100 [00:08<00:25,  2.79it/s]

Epoch 30/100, Train Loss: 14.6592, Val Loss: 103.3369, Val R2: 0.6260, Val MAPE: 0.1148


 31%|███       | 31/100 [00:08<00:22,  3.01it/s]

Epoch 31/100, Train Loss: 13.1488, Val Loss: 103.7098, Val R2: 0.6246, Val MAPE: 0.1155


 32%|███▏      | 32/100 [00:09<00:22,  3.07it/s]

Epoch 32/100, Train Loss: 9.0863, Val Loss: 72.3389, Val R2: 0.7382, Val MAPE: 0.0957


 33%|███▎      | 33/100 [00:09<00:21,  3.14it/s]

Epoch 33/100, Train Loss: 5.4593, Val Loss: 42.3661, Val R2: 0.8466, Val MAPE: 0.0723


 34%|███▍      | 34/100 [00:09<00:19,  3.32it/s]

Epoch 34/100, Train Loss: 3.9023, Val Loss: 25.8563, Val R2: 0.9064, Val MAPE: 0.0567


 35%|███▌      | 35/100 [00:10<00:19,  3.41it/s]

Epoch 35/100, Train Loss: 3.0576, Val Loss: 16.2361, Val R2: 0.9412, Val MAPE: 0.0445


 36%|███▌      | 36/100 [00:10<00:18,  3.51it/s]

Epoch 36/100, Train Loss: 2.9717, Val Loss: 15.8913, Val R2: 0.9425, Val MAPE: 0.0444


 37%|███▋      | 37/100 [00:10<00:17,  3.62it/s]

Epoch 37/100, Train Loss: 3.2860, Val Loss: 9.5822, Val R2: 0.9653, Val MAPE: 0.0365


 38%|███▊      | 38/100 [00:10<00:16,  3.76it/s]

Epoch 38/100, Train Loss: 3.7190, Val Loss: 7.7704, Val R2: 0.9719, Val MAPE: 0.0355


 39%|███▉      | 39/100 [00:11<00:15,  3.92it/s]

Epoch 39/100, Train Loss: 4.3948, Val Loss: 5.5011, Val R2: 0.9801, Val MAPE: 0.0316


 40%|████      | 40/100 [00:11<00:14,  4.04it/s]

Epoch 40/100, Train Loss: 5.8833, Val Loss: 6.1164, Val R2: 0.9779, Val MAPE: 0.0329


 41%|████      | 41/100 [00:11<00:14,  4.13it/s]

Epoch 41/100, Train Loss: 6.8012, Val Loss: 8.4328, Val R2: 0.9695, Val MAPE: 0.0385


 42%|████▏     | 42/100 [00:11<00:14,  4.13it/s]

Epoch 42/100, Train Loss: 8.5441, Val Loss: 14.4499, Val R2: 0.9477, Val MAPE: 0.0531


 43%|████▎     | 43/100 [00:12<00:13,  4.25it/s]

Epoch 43/100, Train Loss: 8.3872, Val Loss: 16.7356, Val R2: 0.9394, Val MAPE: 0.0576


 44%|████▍     | 44/100 [00:12<00:12,  4.31it/s]

Epoch 44/100, Train Loss: 8.3953, Val Loss: 18.5235, Val R2: 0.9330, Val MAPE: 0.0621


 45%|████▌     | 45/100 [00:12<00:12,  4.37it/s]

Epoch 45/100, Train Loss: 5.6895, Val Loss: 9.8899, Val R2: 0.9642, Val MAPE: 0.0434


 46%|████▌     | 46/100 [00:12<00:12,  4.40it/s]

Epoch 46/100, Train Loss: 4.1497, Val Loss: 7.6314, Val R2: 0.9724, Val MAPE: 0.0381


 47%|████▋     | 47/100 [00:12<00:11,  4.44it/s]

Epoch 47/100, Train Loss: 3.5016, Val Loss: 5.9793, Val R2: 0.9784, Val MAPE: 0.0337


 48%|████▊     | 48/100 [00:13<00:11,  4.40it/s]

Epoch 48/100, Train Loss: 2.3560, Val Loss: 3.6797, Val R2: 0.9867, Val MAPE: 0.0258


 49%|████▉     | 49/100 [00:13<00:11,  4.48it/s]

Epoch 49/100, Train Loss: 1.5528, Val Loss: 2.3731, Val R2: 0.9914, Val MAPE: 0.0201


 50%|█████     | 50/100 [00:13<00:11,  4.52it/s]

Epoch 50/100, Train Loss: 1.1478, Val Loss: 2.3136, Val R2: 0.9916, Val MAPE: 0.0193


 51%|█████     | 51/100 [00:13<00:10,  4.57it/s]

Epoch 51/100, Train Loss: 1.0213, Val Loss: 2.8682, Val R2: 0.9896, Val MAPE: 0.0215


 52%|█████▏    | 52/100 [00:14<00:10,  4.59it/s]

Epoch 52/100, Train Loss: 1.1831, Val Loss: 4.4259, Val R2: 0.9840, Val MAPE: 0.0267


 53%|█████▎    | 53/100 [00:14<00:10,  4.61it/s]

Epoch 53/100, Train Loss: 1.5468, Val Loss: 7.5193, Val R2: 0.9728, Val MAPE: 0.0344


 54%|█████▍    | 54/100 [00:14<00:10,  4.53it/s]

Epoch 54/100, Train Loss: 2.2997, Val Loss: 11.5890, Val R2: 0.9581, Val MAPE: 0.0423


 55%|█████▌    | 55/100 [00:14<00:10,  4.22it/s]

Epoch 55/100, Train Loss: 3.2555, Val Loss: 15.8332, Val R2: 0.9427, Val MAPE: 0.0489


 56%|█████▌    | 56/100 [00:15<00:11,  3.97it/s]

Epoch 56/100, Train Loss: 4.3873, Val Loss: 23.2593, Val R2: 0.9158, Val MAPE: 0.0585


 57%|█████▋    | 57/100 [00:15<00:11,  3.81it/s]

Epoch 57/100, Train Loss: 5.8855, Val Loss: 32.7387, Val R2: 0.8815, Val MAPE: 0.0690


 58%|█████▊    | 58/100 [00:15<00:11,  3.54it/s]

Epoch 58/100, Train Loss: 7.2614, Val Loss: 49.6646, Val R2: 0.8202, Val MAPE: 0.0863


 59%|█████▉    | 59/100 [00:15<00:12,  3.29it/s]

Epoch 59/100, Train Loss: 8.1505, Val Loss: 64.3667, Val R2: 0.7670, Val MAPE: 0.0987


 60%|██████    | 60/100 [00:16<00:12,  3.33it/s]

Epoch 60/100, Train Loss: 7.1635, Val Loss: 70.0719, Val R2: 0.7464, Val MAPE: 0.1031


 61%|██████    | 61/100 [00:16<00:11,  3.40it/s]

Epoch 61/100, Train Loss: 5.3378, Val Loss: 56.8474, Val R2: 0.7942, Val MAPE: 0.0930


 62%|██████▏   | 62/100 [00:16<00:11,  3.44it/s]

Epoch 62/100, Train Loss: 3.2449, Val Loss: 42.5636, Val R2: 0.8459, Val MAPE: 0.0791


 63%|██████▎   | 63/100 [00:17<00:10,  3.45it/s]

Epoch 63/100, Train Loss: 1.8549, Val Loss: 26.1854, Val R2: 0.9052, Val MAPE: 0.0601


 64%|██████▍   | 64/100 [00:17<00:10,  3.41it/s]

Epoch 64/100, Train Loss: 1.3305, Val Loss: 15.0713, Val R2: 0.9454, Val MAPE: 0.0426


 65%|██████▌   | 65/100 [00:17<00:09,  3.51it/s]

Epoch 65/100, Train Loss: 1.4403, Val Loss: 12.5150, Val R2: 0.9547, Val MAPE: 0.0399


 66%|██████▌   | 66/100 [00:17<00:09,  3.47it/s]

Epoch 66/100, Train Loss: 2.0574, Val Loss: 6.1564, Val R2: 0.9777, Val MAPE: 0.0324


 67%|██████▋   | 67/100 [00:18<00:09,  3.54it/s]

Epoch 67/100, Train Loss: 2.8133, Val Loss: 4.8588, Val R2: 0.9824, Val MAPE: 0.0312


 68%|██████▊   | 68/100 [00:18<00:08,  3.58it/s]

Epoch 68/100, Train Loss: 4.1064, Val Loss: 4.6347, Val R2: 0.9832, Val MAPE: 0.0304


 69%|██████▉   | 69/100 [00:18<00:08,  3.52it/s]

Epoch 69/100, Train Loss: 6.3175, Val Loss: 7.6099, Val R2: 0.9725, Val MAPE: 0.0375


 70%|███████   | 70/100 [00:19<00:08,  3.45it/s]

Epoch 70/100, Train Loss: 7.7149, Val Loss: 15.2398, Val R2: 0.9448, Val MAPE: 0.0556


 71%|███████   | 71/100 [00:19<00:08,  3.50it/s]

Epoch 71/100, Train Loss: 13.2341, Val Loss: 35.2230, Val R2: 0.8725, Val MAPE: 0.0865


 72%|███████▏  | 72/100 [00:19<00:08,  3.39it/s]

Epoch 72/100, Train Loss: 11.4648, Val Loss: 29.6828, Val R2: 0.8926, Val MAPE: 0.0779


 73%|███████▎  | 73/100 [00:20<00:08,  3.34it/s]

Epoch 73/100, Train Loss: 10.0270, Val Loss: 31.0720, Val R2: 0.8875, Val MAPE: 0.0810


 74%|███████▍  | 74/100 [00:20<00:07,  3.31it/s]

Epoch 74/100, Train Loss: 6.8502, Val Loss: 14.6548, Val R2: 0.9470, Val MAPE: 0.0557


 75%|███████▌  | 75/100 [00:20<00:07,  3.34it/s]

Epoch 75/100, Train Loss: 3.3073, Val Loss: 4.7223, Val R2: 0.9829, Val MAPE: 0.0301


 76%|███████▌  | 76/100 [00:20<00:07,  3.37it/s]

Epoch 76/100, Train Loss: 1.7429, Val Loss: 1.7689, Val R2: 0.9936, Val MAPE: 0.0174


 77%|███████▋  | 77/100 [00:21<00:06,  3.29it/s]

Epoch 77/100, Train Loss: 1.2479, Val Loss: 3.4867, Val R2: 0.9874, Val MAPE: 0.0236


 78%|███████▊  | 78/100 [00:21<00:06,  3.16it/s]

Epoch 78/100, Train Loss: 1.3195, Val Loss: 7.0631, Val R2: 0.9744, Val MAPE: 0.0338


 79%|███████▉  | 79/100 [00:21<00:06,  3.13it/s]

Epoch 79/100, Train Loss: 1.5581, Val Loss: 13.0425, Val R2: 0.9528, Val MAPE: 0.0458


 80%|████████  | 80/100 [00:22<00:06,  3.26it/s]

Epoch 80/100, Train Loss: 1.9624, Val Loss: 18.0053, Val R2: 0.9348, Val MAPE: 0.0544


 81%|████████  | 81/100 [00:22<00:05,  3.38it/s]

Epoch 81/100, Train Loss: 2.0517, Val Loss: 21.4584, Val R2: 0.9223, Val MAPE: 0.0592


 82%|████████▏ | 82/100 [00:22<00:05,  3.34it/s]

Epoch 82/100, Train Loss: 2.0108, Val Loss: 22.2542, Val R2: 0.9194, Val MAPE: 0.0601


 83%|████████▎ | 83/100 [00:23<00:04,  3.40it/s]

Epoch 83/100, Train Loss: 1.7903, Val Loss: 23.1935, Val R2: 0.9160, Val MAPE: 0.0606


 84%|████████▍ | 84/100 [00:23<00:04,  3.28it/s]

Epoch 84/100, Train Loss: 1.8607, Val Loss: 24.8267, Val R2: 0.9101, Val MAPE: 0.0626


 85%|████████▌ | 85/100 [00:23<00:04,  3.40it/s]

Epoch 85/100, Train Loss: 1.7132, Val Loss: 25.1004, Val R2: 0.9091, Val MAPE: 0.0623


 86%|████████▌ | 86/100 [00:23<00:04,  3.39it/s]

Epoch 86/100, Train Loss: 1.5250, Val Loss: 24.9581, Val R2: 0.9097, Val MAPE: 0.0619


 87%|████████▋ | 87/100 [00:24<00:03,  3.43it/s]

Epoch 87/100, Train Loss: 1.4007, Val Loss: 24.9369, Val R2: 0.9097, Val MAPE: 0.0615


 88%|████████▊ | 88/100 [00:24<00:03,  3.40it/s]

Epoch 88/100, Train Loss: 1.2040, Val Loss: 24.1792, Val R2: 0.9125, Val MAPE: 0.0602


 89%|████████▉ | 89/100 [00:24<00:03,  3.54it/s]

Epoch 89/100, Train Loss: 0.9854, Val Loss: 22.8504, Val R2: 0.9173, Val MAPE: 0.0582


 90%|█████████ | 90/100 [00:25<00:02,  3.68it/s]

Epoch 90/100, Train Loss: 0.7837, Val Loss: 21.2978, Val R2: 0.9229, Val MAPE: 0.0556


 91%|█████████ | 91/100 [00:25<00:02,  3.73it/s]

Epoch 91/100, Train Loss: 0.5963, Val Loss: 19.0706, Val R2: 0.9310, Val MAPE: 0.0519


 92%|█████████▏| 92/100 [00:25<00:02,  3.74it/s]

Epoch 92/100, Train Loss: 0.4783, Val Loss: 16.1916, Val R2: 0.9414, Val MAPE: 0.0465


 93%|█████████▎| 93/100 [00:25<00:01,  3.82it/s]

Epoch 93/100, Train Loss: 0.5546, Val Loss: 13.0713, Val R2: 0.9527, Val MAPE: 0.0396


 94%|█████████▍| 94/100 [00:26<00:01,  3.81it/s]

Epoch 94/100, Train Loss: 1.2269, Val Loss: 8.9486, Val R2: 0.9676, Val MAPE: 0.0356


 95%|█████████▌| 95/100 [00:26<00:01,  3.74it/s]

Epoch 95/100, Train Loss: 2.2591, Val Loss: 14.5195, Val R2: 0.9474, Val MAPE: 0.0457


 96%|█████████▌| 96/100 [00:26<00:01,  3.80it/s]

Epoch 96/100, Train Loss: 2.4524, Val Loss: 4.5727, Val R2: 0.9834, Val MAPE: 0.0295


 97%|█████████▋| 97/100 [00:26<00:00,  3.66it/s]

Epoch 97/100, Train Loss: 9.1370, Val Loss: 11.2178, Val R2: 0.9594, Val MAPE: 0.0469


 98%|█████████▊| 98/100 [00:27<00:00,  3.67it/s]

Epoch 98/100, Train Loss: 6.2809, Val Loss: 9.5662, Val R2: 0.9654, Val MAPE: 0.0413


 99%|█████████▉| 99/100 [00:27<00:00,  3.74it/s]

Epoch 99/100, Train Loss: 11.1523, Val Loss: 28.0995, Val R2: 0.8983, Val MAPE: 0.0764


100%|██████████| 100/100 [00:27<00:00,  3.61it/s]


Epoch 100/100, Train Loss: 14.8531, Val Loss: 53.2378, Val R2: 0.8073, Val MAPE: 0.1054
Loaded best model from MLP_logs/logs_2/best_model_epoch_76_r2_0.9936.pth
Using model from MLP_logs/logs_2/best_model_epoch_76_r2_0.9936.pth for rolling prediction.


Rolling Prediction: 100%|██████████| 423/423 [09:01<00:00,  1.28s/it]


In [10]:
# 计算测试集 R2 和 MAPE
test_actuals = test_data['Close'].values
test_r2 = r2_score(test_actuals, test_predictions)
test_mape = mean_absolute_percentage_error(test_actuals, test_predictions)

print(f'Test R2: {test_r2:.4f}')
print(f'Test MAPE: {test_mape:.4f}')

# 保存预测数据到CSV
predictions_df = pd.DataFrame({
    'Actual': test_actuals,
    'Predicted': test_predictions
})
predictions_csv_path = os.path.join(logs_dir, 'predictions.csv')
predictions_df.to_csv(predictions_csv_path, index=False)
print(f'Saved predictions to {predictions_csv_path}')

# 使用 Plotly 可视化训练和验证过程中的损失、R2 和 MAPE
epochs = list(range(1, num_epochs + 1))

fig_loss = go.Figure()
fig_loss.add_trace(go.Scatter(x=epochs, y=train_losses, mode='lines', name='Train Loss'))
fig_loss.add_trace(go.Scatter(x=epochs, y=val_losses, mode='lines', name='Validation Loss'))
fig_loss.update_layout(title='Train and Validation Loss', xaxis=dict(title='Epoch'), yaxis=dict(title='Loss'))
fig_loss.show()

fig_r2 = go.Figure()
fig_r2.add_trace(go.Scatter(x=epochs, y=val_r2s, mode='lines', name='Validation R2'))
fig_r2.update_layout(title='Validation R2 Over Epochs', xaxis=dict(title='Epoch'), yaxis=dict(title='R2'))
fig_r2.show()

fig_mape = go.Figure()
fig_mape.add_trace(go.Scatter(x=epochs, y=val_mapes, mode='lines', name='Validation MAPE'))
fig_mape.update_layout(title='Validation MAPE Over Epochs', xaxis=dict(title='Epoch'), yaxis=dict(title='MAPE'))
fig_mape.show()

# 使用 Plotly 可视化滚动预测结果与实际结果
test_trace_actual = go.Scatter(x=test_data.index, y=test_actuals, mode='lines', name='Test Actual Close Price')
test_trace_predicted = go.Scatter(x=test_data.index, y=test_predictions, mode='lines', name='Test Predicted Close Price')

layout = go.Layout(title='Rolling Prediction vs Actual Close Price', xaxis=dict(title='Time'), yaxis=dict(title='Close Price'))

fig = go.Figure(data=[test_trace_actual, test_trace_predicted], layout=layout)
fig.show()


Test R2: 0.9611
Test MAPE: 0.0131
Saved predictions to MLP_logs/predictions.csv


---
# Baseline: BiLSTM

In [1]:
import os
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn, optim
from sklearn.metrics import r2_score, mean_absolute_percentage_error
import plotly.graph_objs as go
import plotly.offline as py
from tqdm import tqdm

# 读取数据
file_path = '/teamspace/studios/this_studio/visualization/ping_an_data_with_pandas_ta_features.csv'
data = pd.read_csv(file_path)

# 填充缺失值
data = data.bfill().ffill()

# 列名示例
features = ['Open', 'High', 'Low', 'Close', 'Volume', 'Return', 'MA5', 'MA10', 'MA50', 'MA200', 
            'BBL_20_2.0', 'BBM_20_2.0', 'BBU_20_2.0', 'BBB_20_2.0', 'BBP_20_2.0', 'STOCHk_14_3_3', 
            'STOCHd_14_3_3', 'ATR', 'VWAP', 'CMO', 'RSI', 'MACD_12_26_9', 'MACDh_12_26_9', 
            'MACDs_12_26_9', 'Volatility']

# 数据集划分
train_size = int(len(data) * 0.7)
val_size = int(len(data) * 0.2)
test_size = len(data) - train_size - val_size

train_data = data[:train_size]
val_data = data[train_size:train_size + val_size]
test_data = data[train_size + val_size:]

class StockDataset(Dataset):
    def __init__(self, data, mean=None, std=None):
        self.data = data
        self.features = data[features].values.astype(np.float32)
        self.targets = data['Close'].values.astype(np.float32)

        if mean is None or std is None:
            self.mean = self.features.mean(axis=0)
            self.std = self.features.std(axis=0)
        else:
            self.mean = mean
            self.std = std

        self.features = (self.features - self.mean) / self.std

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        feature = self.features[idx]
        target = self.targets[idx]
        return torch.tensor(feature).unsqueeze(0), torch.tensor(target)  # 在这里增加时间步维度

# 定义BiLSTM模型
class BiLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(BiLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_size * 2, 1)  # 双向LSTM的输出是2倍隐藏层大小

    def forward(self, x):
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(x.device)  # 双向LSTM需要2倍层数的初始化
        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

# 创建日志文件夹
logs_dir = "BiLSTM_logs"
if not os.path.exists(logs_dir):
    os.makedirs(logs_dir)

# 获取下一个训练编号
def get_next_log_index(logs_dir):
    existing_logs = [int(name.split('_')[1]) for name in os.listdir(logs_dir) if os.path.isdir(os.path.join(logs_dir, name))]
    if not existing_logs:
        return 0
    return max(existing_logs) + 1

# 训练函数
def train(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for features, targets in loader:
        features, targets = features.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, targets.unsqueeze(1))
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * features.size(0)
    epoch_loss = running_loss / len(loader.dataset)
    return epoch_loss

# 验证函数
def validate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    all_targets = []
    all_predictions = []
    with torch.no_grad():
        for features, targets in loader:
            features, targets = features.to(device), targets.to(device)
            outputs = model(features)
            loss = criterion(outputs, targets.unsqueeze(1))
            running_loss += loss.item() * features.size(0)
            all_targets.extend(targets.cpu().numpy())
            all_predictions.extend(outputs.cpu().numpy())
    epoch_loss = running_loss / len(loader.dataset)
    return epoch_loss, r2_score(all_targets, all_predictions), mean_absolute_percentage_error(all_targets, all_predictions)

# 训练和验证过程
def train_and_validate(model, train_loader, val_loader, num_epochs, device):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    train_losses = []
    val_losses = []
    val_r2s = []
    val_mapes = []
    best_val_r2 = float('-inf')
    best_model_path = None

    # 创建新的子文件夹
    log_index = get_next_log_index(logs_dir)
    logs_dir = os.path.join(logs_dir, f'logs_{log_index}')
    os.makedirs(logs_dir)

    for epoch in tqdm(range(num_epochs)):
        train_loss = train(model, train_loader, criterion, optimizer, device)
        val_loss, val_r2, val_mape = validate(model, val_loader, criterion, device)
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        val_r2s.append(val_r2)
        val_mapes.append(val_mape)

        # Save the model if validation R2 is the best we've seen so far
        if val_r2 > best_val_r2:
            best_val_r2 = val_r2
            best_model_path = os.path.join(logs_dir, f'best_model_epoch_{epoch+1}_r2_{val_r2:.4f}.pth')
            torch.save(model.state_dict(), best_model_path)

        print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val R2: {val_r2:.4f}, Val MAPE: {val_mape:.4f}')

    # Load the best model before returning
    if best_model_path:
        model.load_state_dict(torch.load(best_model_path))
        print(f'Loaded best model from {best_model_path}')
    return train_losses, val_losses, val_r2s, val_mapes, best_model_path

# 滚动预测函数（带再训练）
def rolling_predict_with_retraining(model, train_data, val_data, test_data, window_size, device, num_epochs=5):
    model.eval()
    predictions = []
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    updated_train_data = pd.concat([train_data, val_data])

    for i in tqdm(range(len(test_data)), desc="Rolling Prediction"):
        # 获取滚动窗口数据
        window_data = pd.concat([updated_train_data, test_data[:i]], axis=0).tail(window_size)
        window_dataset = StockDataset(window_data, mean=train_dataset.mean, std=train_dataset.std)
        window_loader = DataLoader(window_dataset, batch_size=len(window_dataset), shuffle=False)

        # 进行预测
        with torch.no_grad():
            for features, _ in window_loader:
                features = features.to(device)
                prediction = model(features)
                predictions.append(prediction[-1].item())

        # 将预测值加入训练数据（模拟实际情况）
        new_row = test_data.iloc[i].copy()
        new_row['Close'] = predictions[-1]
        updated_train_data = pd.concat([updated_train_data, new_row.to_frame().T])

        # 重新训练模型
        train_loader = DataLoader(window_dataset, batch_size=batch_size, shuffle=False)
        for epoch in range(num_epochs):
            train_loss = train(model, train_loader, criterion, optimizer, device)

    return predictions


In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size = 32
# 加载最佳模型并进行训练和验证
input_size = len(features)
hidden_size = 64
num_layers = 2
model = BiLSTM(input_size, hidden_size, num_layers).to(device)
num_epochs = 100

train_dataset = StockDataset(train_data)
val_dataset = StockDataset(val_data, train_dataset.mean, train_dataset.std)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

train_losses, val_losses, val_r2s, val_mapes, best_model_path = train_and_validate(model, train_loader, val_loader, num_epochs, device)


  1%|          | 1/100 [00:00<00:23,  4.26it/s]

Epoch 1/100, Train Loss: 1284.1266, Val Loss: 4512.8602, Val R2: -15.3351, Val MAPE: 0.9478


  2%|▏         | 2/100 [00:00<00:22,  4.42it/s]

Epoch 2/100, Train Loss: 622.7401, Val Loss: 2330.2331, Val R2: -7.4347, Val MAPE: 0.6432


  3%|▎         | 3/100 [00:00<00:24,  4.02it/s]

Epoch 3/100, Train Loss: 292.9544, Val Loss: 1829.6230, Val R2: -5.6227, Val MAPE: 0.5453


  4%|▍         | 4/100 [00:00<00:23,  4.07it/s]

Epoch 4/100, Train Loss: 209.8302, Val Loss: 1516.6442, Val R2: -4.4898, Val MAPE: 0.4793


  5%|▌         | 5/100 [00:01<00:23,  4.04it/s]

Epoch 5/100, Train Loss: 152.4254, Val Loss: 1275.3304, Val R2: -3.6163, Val MAPE: 0.4230


  6%|▌         | 6/100 [00:01<00:22,  4.11it/s]

Epoch 6/100, Train Loss: 119.5254, Val Loss: 1097.2765, Val R2: -2.9718, Val MAPE: 0.3776


  7%|▋         | 7/100 [00:01<00:23,  4.01it/s]

Epoch 7/100, Train Loss: 98.5384, Val Loss: 959.7357, Val R2: -2.4739, Val MAPE: 0.3399


  8%|▊         | 8/100 [00:01<00:23,  3.89it/s]

Epoch 8/100, Train Loss: 79.8914, Val Loss: 847.2914, Val R2: -2.0669, Val MAPE: 0.3101


  9%|▉         | 9/100 [00:02<00:22,  4.05it/s]

Epoch 9/100, Train Loss: 66.2015, Val Loss: 750.3034, Val R2: -1.7159, Val MAPE: 0.2861


 10%|█         | 10/100 [00:02<00:23,  3.90it/s]

Epoch 10/100, Train Loss: 53.5175, Val Loss: 663.6794, Val R2: -1.4023, Val MAPE: 0.2651


 11%|█         | 11/100 [00:02<00:22,  3.99it/s]

Epoch 11/100, Train Loss: 42.8908, Val Loss: 588.0453, Val R2: -1.1285, Val MAPE: 0.2478


 12%|█▏        | 12/100 [00:02<00:21,  4.01it/s]

Epoch 12/100, Train Loss: 34.4283, Val Loss: 523.0247, Val R2: -0.8932, Val MAPE: 0.2358


 13%|█▎        | 13/100 [00:03<00:21,  4.06it/s]

Epoch 13/100, Train Loss: 27.8395, Val Loss: 467.9750, Val R2: -0.6939, Val MAPE: 0.2273


 14%|█▍        | 14/100 [00:03<00:20,  4.18it/s]

Epoch 14/100, Train Loss: 22.6181, Val Loss: 421.3414, Val R2: -0.5251, Val MAPE: 0.2202


 15%|█▌        | 15/100 [00:03<00:19,  4.31it/s]

Epoch 15/100, Train Loss: 18.5279, Val Loss: 381.6387, Val R2: -0.3814, Val MAPE: 0.2137


 16%|█▌        | 16/100 [00:03<00:19,  4.34it/s]

Epoch 16/100, Train Loss: 15.3393, Val Loss: 347.5828, Val R2: -0.2581, Val MAPE: 0.2070


 17%|█▋        | 17/100 [00:04<00:19,  4.25it/s]

Epoch 17/100, Train Loss: 12.8885, Val Loss: 318.0383, Val R2: -0.1512, Val MAPE: 0.2000


 18%|█▊        | 18/100 [00:04<00:18,  4.38it/s]

Epoch 18/100, Train Loss: 11.0270, Val Loss: 291.9163, Val R2: -0.0566, Val MAPE: 0.1923


 19%|█▉        | 19/100 [00:04<00:17,  4.50it/s]

Epoch 19/100, Train Loss: 9.6590, Val Loss: 268.1608, Val R2: 0.0293, Val MAPE: 0.1832


 21%|██        | 21/100 [00:04<00:17,  4.64it/s]

Epoch 20/100, Train Loss: 8.8365, Val Loss: 246.3737, Val R2: 0.1082, Val MAPE: 0.1726
Epoch 21/100, Train Loss: 8.4482, Val Loss: 226.5535, Val R2: 0.1799, Val MAPE: 0.1607


 22%|██▏       | 22/100 [00:05<00:16,  4.72it/s]

Epoch 22/100, Train Loss: 7.4367, Val Loss: 208.5752, Val R2: 0.2450, Val MAPE: 0.1470


 23%|██▎       | 23/100 [00:05<00:16,  4.71it/s]

Epoch 23/100, Train Loss: 6.1888, Val Loss: 192.9311, Val R2: 0.3017, Val MAPE: 0.1333


 24%|██▍       | 24/100 [00:05<00:16,  4.73it/s]

Epoch 24/100, Train Loss: 6.7803, Val Loss: 179.9941, Val R2: 0.3485, Val MAPE: 0.1258


 25%|██▌       | 25/100 [00:05<00:16,  4.63it/s]

Epoch 25/100, Train Loss: 6.4506, Val Loss: 168.6132, Val R2: 0.3897, Val MAPE: 0.1204


 26%|██▌       | 26/100 [00:06<00:15,  4.68it/s]

Epoch 26/100, Train Loss: 4.8344, Val Loss: 158.5860, Val R2: 0.4260, Val MAPE: 0.1164


 27%|██▋       | 27/100 [00:06<00:15,  4.72it/s]

Epoch 27/100, Train Loss: 3.8530, Val Loss: 151.4057, Val R2: 0.4520, Val MAPE: 0.1156


 28%|██▊       | 28/100 [00:06<00:15,  4.77it/s]

Epoch 28/100, Train Loss: 3.2785, Val Loss: 145.4932, Val R2: 0.4734, Val MAPE: 0.1137


 29%|██▉       | 29/100 [00:06<00:15,  4.71it/s]

Epoch 29/100, Train Loss: 2.7315, Val Loss: 134.2358, Val R2: 0.5141, Val MAPE: 0.1128


 30%|███       | 30/100 [00:06<00:15,  4.46it/s]

Epoch 30/100, Train Loss: 3.4712, Val Loss: 122.9340, Val R2: 0.5550, Val MAPE: 0.1155


 31%|███       | 31/100 [00:07<00:15,  4.44it/s]

Epoch 31/100, Train Loss: 4.5240, Val Loss: 118.2046, Val R2: 0.5721, Val MAPE: 0.1169


 32%|███▏      | 32/100 [00:07<00:15,  4.42it/s]

Epoch 32/100, Train Loss: 3.4085, Val Loss: 112.2446, Val R2: 0.5937, Val MAPE: 0.1062


 34%|███▍      | 34/100 [00:07<00:14,  4.64it/s]

Epoch 33/100, Train Loss: 2.1264, Val Loss: 107.4138, Val R2: 0.6112, Val MAPE: 0.0996
Epoch 34/100, Train Loss: 2.1454, Val Loss: 100.7411, Val R2: 0.6353, Val MAPE: 0.0934


 36%|███▌      | 36/100 [00:08<00:13,  4.90it/s]

Epoch 35/100, Train Loss: 2.3236, Val Loss: 94.6279, Val R2: 0.6575, Val MAPE: 0.0898
Epoch 36/100, Train Loss: 1.9485, Val Loss: 89.1290, Val R2: 0.6774, Val MAPE: 0.0873


 38%|███▊      | 38/100 [00:08<00:12,  5.05it/s]

Epoch 37/100, Train Loss: 1.3720, Val Loss: 84.0593, Val R2: 0.6957, Val MAPE: 0.0847
Epoch 38/100, Train Loss: 1.1546, Val Loss: 79.5073, Val R2: 0.7122, Val MAPE: 0.0821


 40%|████      | 40/100 [00:08<00:11,  5.11it/s]

Epoch 39/100, Train Loss: 1.4239, Val Loss: 74.7682, Val R2: 0.7294, Val MAPE: 0.0776
Epoch 40/100, Train Loss: 1.2236, Val Loss: 70.1311, Val R2: 0.7461, Val MAPE: 0.0741


 42%|████▏     | 42/100 [00:09<00:11,  5.15it/s]

Epoch 41/100, Train Loss: 0.9157, Val Loss: 66.3139, Val R2: 0.7600, Val MAPE: 0.0724
Epoch 42/100, Train Loss: 0.7434, Val Loss: 62.7505, Val R2: 0.7729, Val MAPE: 0.0687


 44%|████▍     | 44/100 [00:09<00:10,  5.15it/s]

Epoch 43/100, Train Loss: 0.8286, Val Loss: 59.9180, Val R2: 0.7831, Val MAPE: 0.0658
Epoch 44/100, Train Loss: 1.0911, Val Loss: 57.7992, Val R2: 0.7908, Val MAPE: 0.0650


 46%|████▌     | 46/100 [00:10<00:10,  5.19it/s]

Epoch 45/100, Train Loss: 1.5372, Val Loss: 57.1482, Val R2: 0.7931, Val MAPE: 0.0664
Epoch 46/100, Train Loss: 2.2929, Val Loss: 60.2427, Val R2: 0.7819, Val MAPE: 0.0725


 47%|████▋     | 47/100 [00:10<00:10,  5.18it/s]

Epoch 47/100, Train Loss: 3.0853, Val Loss: 80.8748, Val R2: 0.7073, Val MAPE: 0.0969


 48%|████▊     | 48/100 [00:10<00:10,  4.93it/s]

Epoch 48/100, Train Loss: 3.7184, Val Loss: 124.7228, Val R2: 0.5485, Val MAPE: 0.1236


 50%|█████     | 50/100 [00:10<00:10,  4.86it/s]

Epoch 49/100, Train Loss: 4.4152, Val Loss: 106.0152, Val R2: 0.6163, Val MAPE: 0.1016
Epoch 50/100, Train Loss: 5.1152, Val Loss: 71.6060, Val R2: 0.7408, Val MAPE: 0.1003


 52%|█████▏    | 52/100 [00:11<00:09,  4.97it/s]

Epoch 51/100, Train Loss: 5.1720, Val Loss: 62.7151, Val R2: 0.7730, Val MAPE: 0.0933
Epoch 52/100, Train Loss: 3.2289, Val Loss: 63.2740, Val R2: 0.7710, Val MAPE: 0.0883


 53%|█████▎    | 53/100 [00:11<00:09,  4.97it/s]

Epoch 53/100, Train Loss: 1.7522, Val Loss: 61.4477, Val R2: 0.7776, Val MAPE: 0.0825


 55%|█████▌    | 55/100 [00:11<00:08,  5.03it/s]

Epoch 54/100, Train Loss: 1.2755, Val Loss: 57.5266, Val R2: 0.7918, Val MAPE: 0.0763
Epoch 55/100, Train Loss: 0.7817, Val Loss: 53.4092, Val R2: 0.8067, Val MAPE: 0.0718


 56%|█████▌    | 56/100 [00:12<00:08,  4.97it/s]

Epoch 56/100, Train Loss: 0.5566, Val Loss: 50.3886, Val R2: 0.8176, Val MAPE: 0.0692


 58%|█████▊    | 58/100 [00:12<00:08,  5.02it/s]

Epoch 57/100, Train Loss: 0.4668, Val Loss: 49.4344, Val R2: 0.8211, Val MAPE: 0.0677
Epoch 58/100, Train Loss: 0.3611, Val Loss: 49.6469, Val R2: 0.8203, Val MAPE: 0.0668


 60%|██████    | 60/100 [00:12<00:07,  5.02it/s]

Epoch 59/100, Train Loss: 0.3509, Val Loss: 48.4229, Val R2: 0.8247, Val MAPE: 0.0655
Epoch 60/100, Train Loss: 0.3308, Val Loss: 46.2847, Val R2: 0.8325, Val MAPE: 0.0633


 61%|██████    | 61/100 [00:13<00:07,  5.01it/s]

Epoch 61/100, Train Loss: 0.2842, Val Loss: 44.6945, Val R2: 0.8382, Val MAPE: 0.0620


 62%|██████▏   | 62/100 [00:13<00:07,  4.98it/s]

Epoch 62/100, Train Loss: 0.2749, Val Loss: 44.5571, Val R2: 0.8387, Val MAPE: 0.0617


 63%|██████▎   | 63/100 [00:13<00:07,  4.96it/s]

Epoch 63/100, Train Loss: 0.2724, Val Loss: 45.1506, Val R2: 0.8366, Val MAPE: 0.0621


 64%|██████▍   | 64/100 [00:13<00:07,  4.96it/s]

Epoch 64/100, Train Loss: 0.3247, Val Loss: 46.5818, Val R2: 0.8314, Val MAPE: 0.0629


 65%|██████▌   | 65/100 [00:13<00:07,  4.96it/s]

Epoch 65/100, Train Loss: 0.3455, Val Loss: 47.4080, Val R2: 0.8284, Val MAPE: 0.0642


 67%|██████▋   | 67/100 [00:14<00:06,  4.97it/s]

Epoch 66/100, Train Loss: 0.5106, Val Loss: 50.1963, Val R2: 0.8183, Val MAPE: 0.0657
Epoch 67/100, Train Loss: 0.5306, Val Loss: 52.6876, Val R2: 0.8093, Val MAPE: 0.0722


 69%|██████▉   | 69/100 [00:14<00:06,  5.01it/s]

Epoch 68/100, Train Loss: 1.1947, Val Loss: 57.0105, Val R2: 0.7936, Val MAPE: 0.0723
Epoch 69/100, Train Loss: 1.7199, Val Loss: 63.5289, Val R2: 0.7700, Val MAPE: 0.0777


 71%|███████   | 71/100 [00:15<00:05,  5.11it/s]

Epoch 70/100, Train Loss: 2.1392, Val Loss: 45.1050, Val R2: 0.8367, Val MAPE: 0.0714
Epoch 71/100, Train Loss: 3.8487, Val Loss: 41.2496, Val R2: 0.8507, Val MAPE: 0.0776


 72%|███████▏  | 72/100 [00:15<00:05,  5.01it/s]

Epoch 72/100, Train Loss: 4.7892, Val Loss: 55.9484, Val R2: 0.7975, Val MAPE: 0.1021


 73%|███████▎  | 73/100 [00:15<00:05,  4.82it/s]

Epoch 73/100, Train Loss: 5.2597, Val Loss: 41.3541, Val R2: 0.8503, Val MAPE: 0.0716


 75%|███████▌  | 75/100 [00:15<00:05,  4.81it/s]

Epoch 74/100, Train Loss: 4.3783, Val Loss: 39.9731, Val R2: 0.8553, Val MAPE: 0.0670
Epoch 75/100, Train Loss: 1.7431, Val Loss: 36.8557, Val R2: 0.8666, Val MAPE: 0.0577


 77%|███████▋  | 77/100 [00:16<00:04,  4.97it/s]

Epoch 76/100, Train Loss: 1.2683, Val Loss: 38.3355, Val R2: 0.8612, Val MAPE: 0.0585
Epoch 77/100, Train Loss: 0.9751, Val Loss: 42.5418, Val R2: 0.8460, Val MAPE: 0.0622


 79%|███████▉  | 79/100 [00:16<00:04,  5.04it/s]

Epoch 78/100, Train Loss: 0.9171, Val Loss: 48.4788, Val R2: 0.8245, Val MAPE: 0.0679
Epoch 79/100, Train Loss: 0.7656, Val Loss: 56.5245, Val R2: 0.7954, Val MAPE: 0.0778


 81%|████████  | 81/100 [00:17<00:03,  5.13it/s]

Epoch 80/100, Train Loss: 0.6045, Val Loss: 57.7494, Val R2: 0.7910, Val MAPE: 0.0767
Epoch 81/100, Train Loss: 0.5644, Val Loss: 48.0881, Val R2: 0.8259, Val MAPE: 0.0656


 83%|████████▎ | 83/100 [00:17<00:03,  5.16it/s]

Epoch 82/100, Train Loss: 0.8225, Val Loss: 36.6436, Val R2: 0.8674, Val MAPE: 0.0635
Epoch 83/100, Train Loss: 1.6022, Val Loss: 32.5043, Val R2: 0.8823, Val MAPE: 0.0645


 85%|████████▌ | 85/100 [00:17<00:02,  5.12it/s]

Epoch 84/100, Train Loss: 2.0117, Val Loss: 34.8717, Val R2: 0.8738, Val MAPE: 0.0707
Epoch 85/100, Train Loss: 1.7958, Val Loss: 35.3387, Val R2: 0.8721, Val MAPE: 0.0692


 87%|████████▋ | 87/100 [00:18<00:02,  5.12it/s]

Epoch 86/100, Train Loss: 1.0719, Val Loss: 32.5554, Val R2: 0.8822, Val MAPE: 0.0608
Epoch 87/100, Train Loss: 0.5587, Val Loss: 31.5471, Val R2: 0.8858, Val MAPE: 0.0518


 89%|████████▉ | 89/100 [00:18<00:02,  5.14it/s]

Epoch 88/100, Train Loss: 1.0087, Val Loss: 38.4735, Val R2: 0.8607, Val MAPE: 0.0581
Epoch 89/100, Train Loss: 1.5177, Val Loss: 48.3235, Val R2: 0.8251, Val MAPE: 0.0720


 91%|█████████ | 91/100 [00:19<00:01,  5.16it/s]

Epoch 90/100, Train Loss: 1.5484, Val Loss: 58.0328, Val R2: 0.7899, Val MAPE: 0.0822
Epoch 91/100, Train Loss: 1.2669, Val Loss: 68.4803, Val R2: 0.7521, Val MAPE: 0.0903


 93%|█████████▎| 93/100 [00:19<00:01,  5.20it/s]

Epoch 92/100, Train Loss: 0.8431, Val Loss: 64.0139, Val R2: 0.7683, Val MAPE: 0.0767
Epoch 93/100, Train Loss: 1.0021, Val Loss: 45.7644, Val R2: 0.8343, Val MAPE: 0.0737


 95%|█████████▌| 95/100 [00:19<00:00,  5.18it/s]

Epoch 94/100, Train Loss: 1.6294, Val Loss: 38.1752, Val R2: 0.8618, Val MAPE: 0.0723
Epoch 95/100, Train Loss: 1.6826, Val Loss: 36.8506, Val R2: 0.8666, Val MAPE: 0.0673


 97%|█████████▋| 97/100 [00:20<00:00,  5.22it/s]

Epoch 96/100, Train Loss: 1.0594, Val Loss: 36.5191, Val R2: 0.8678, Val MAPE: 0.0641
Epoch 97/100, Train Loss: 0.4733, Val Loss: 37.7204, Val R2: 0.8635, Val MAPE: 0.0592


 98%|█████████▊| 98/100 [00:20<00:00,  4.91it/s]

Epoch 98/100, Train Loss: 0.6784, Val Loss: 39.9937, Val R2: 0.8552, Val MAPE: 0.0582


100%|██████████| 100/100 [00:20<00:00,  4.79it/s]

Epoch 99/100, Train Loss: 0.7456, Val Loss: 45.1229, Val R2: 0.8367, Val MAPE: 0.0643
Epoch 100/100, Train Loss: 0.6658, Val Loss: 49.9401, Val R2: 0.8192, Val MAPE: 0.0697
Loaded best model from BiLSTM_logs/logs_2/best_model_epoch_87_r2_0.8858.pth





In [8]:
# 在测试集上进行滚动预测
window_size = len(pd.concat([train_data, val_data]))  # 使用训练和验证数据作为初始窗口
model.load_state_dict(torch.load(best_model_path))  # 确保使用验证集上表现最好的模型
print(f'Using model from {best_model_path} for rolling prediction.')
test_predictions = rolling_predict_with_retraining(model, train_data, val_data, test_data, window_size, device)


Using model from BiLSTM_logs/logs_2/best_model_epoch_87_r2_0.8858.pth for rolling prediction.


Rolling Prediction: 100%|██████████| 423/423 [08:13<00:00,  1.17s/it]


In [9]:

# 计算测试集 R2 和 MAPE
test_actuals = test_data['Close'].values
test_r2 = r2_score(test_actuals, test_predictions)
test_mape = mean_absolute_percentage_error(test_actuals, test_predictions)

print(f'Test R2: {test_r2:.4f}')
print(f'Test MAPE: {test_mape:.4f}')

# 保存预测数据到CSV
predictions_df = pd.DataFrame({
    'Actual': test_actuals,
    'Predicted': test_predictions
})
predictions_csv_path = os.path.join(logs_dir, 'predictions.csv')
predictions_df.to_csv(predictions_csv_path, index=False)
print(f'Saved predictions to {predictions_csv_path}')

# 使用 Plotly 可视化训练和验证过程中的损失、R2 和 MAPE
epochs = list(range(1, num_epochs + 1))

fig_loss = go.Figure()
fig_loss.add_trace(go.Scatter(x=epochs, y=train_losses, mode='lines', name='Train Loss'))
fig_loss.add_trace(go.Scatter(x=epochs, y=val_losses, mode='lines', name='Validation Loss'))
fig_loss.update_layout(title='Train and Validation Loss', xaxis=dict(title='Epoch'), yaxis=dict(title='Loss'))
fig_loss.show()

fig_r2 = go.Figure()
fig_r2.add_trace(go.Scatter(x=epochs, y=val_r2s, mode='lines', name='Validation R2'))
fig_r2.update_layout(title='Validation R2 Over Epochs', xaxis=dict(title='Epoch'), yaxis=dict(title='R2'))
fig_r2.show()

fig_mape = go.Figure()
fig_mape.add_trace(go.Scatter(x=epochs, y=val_mapes, mode='lines', name='Validation MAPE'))
fig_mape.update_layout(title='Validation MAPE Over Epochs', xaxis=dict(title='Epoch'), yaxis=dict(title='MAPE'))
fig_mape.show()

# 使用 Plotly 可视化滚动预测结果与实际结果
test_trace_actual = go.Scatter(x=test_data.index, y=test_actuals, mode='lines', name='Test Actual Close Price')
test_trace_predicted = go.Scatter(x=test_data.index, y=test_predictions, mode='lines', name='Test Predicted Close Price')

layout = go.Layout(title='Rolling Prediction vs Actual Close Price', xaxis=dict(title='Time'), yaxis=dict(title='Close Price'))

fig = go.Figure(data=[test_trace_actual, test_trace_predicted], layout=layout)
fig.show()


Test R2: 0.9467
Test MAPE: 0.0161
Saved predictions to BiLSTM_logs/predictions.csv


---
# KAN

In [19]:
import os
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn, optim
from sklearn.metrics import r2_score, mean_absolute_percentage_error
import plotly.graph_objs as go
import plotly.offline as py
from tqdm import tqdm
from KAN import KAN

# 读取数据
file_path = '/teamspace/studios/this_studio/ping_an_data_with_pandas_ta_features.csv'
data = pd.read_csv(file_path)

# 填充缺失值
data = data.bfill().ffill()

# 列名示例
features = ['Open', 'High', 'Low', 'Close', 'Volume', 'Return', 'MA5', 'MA10', 'MA50', 'MA200', 
            'BBL_20_2.0', 'BBM_20_2.0', 'BBU_20_2.0', 'BBB_20_2.0', 'BBP_20_2.0', 'STOCHk_14_3_3', 
            'STOCHd_14_3_3', 'ATR', 'VWAP', 'CMO', 'RSI', 'MACD_12_26_9', 'MACDh_12_26_9', 
            'MACDs_12_26_9', 'Volatility']

# 数据集划分
train_size = int(len(data) * 0.7)
val_size = int(len(data) * 0.2)
test_size = len(data) - train_size - val_size

train_data = data[:train_size]
val_data = data[train_size:train_size + val_size]
test_data = data[train_size + val_size:]

class StockDataset(Dataset):
    def __init__(self, data, mean=None, std=None):
        self.data = data
        self.features = data[features].values.astype(np.float32)
        self.targets = data['Close'].values.astype(np.float32)

        if mean is None or std is None:
            self.mean = self.features.mean(axis=0)
            self.std = self.features.std(axis=0)
        else:
            self.mean = mean
            self.std = std

        self.features = (self.features - self.mean) / self.std

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        feature = self.features[idx]
        target = self.targets[idx]
        return torch.tensor(feature), torch.tensor(target)

# 创建日志文件夹
logs_dir = "KAN_logs"
if not os.path.exists(logs_dir):
    os.makedirs(logs_dir)

# 获取下一个训练编号
def get_next_log_index(logs_dir):
    existing_logs = [int(name.split('_')[1]) for name in os.listdir(logs_dir) if os.path.isdir(os.path.join(logs_dir, name))]
    if not existing_logs:
        return 0
    return max(existing_logs) + 1

# 训练函数
def train(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for features, targets in loader:
        features, targets = features.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, targets.unsqueeze(1))
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * features.size(0)
    epoch_loss = running_loss / len(loader.dataset)
    return epoch_loss

# 验证函数
def validate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    all_targets = []
    all_predictions = []
    with torch.no_grad():
        for features, targets in loader:
            features, targets = features.to(device), targets.to(device)
            outputs = model(features)
            loss = criterion(outputs, targets.unsqueeze(1))
            running_loss += loss.item() * features.size(0)
            all_targets.extend(targets.cpu().numpy())
            all_predictions.extend(outputs.cpu().numpy())
    epoch_loss = running_loss / len(loader.dataset)
    return epoch_loss, r2_score(all_targets, all_predictions), mean_absolute_percentage_error(all_targets, all_predictions)

# 训练和验证过程
def train_and_validate(model, train_loader, val_loader, num_epochs, device):
    criterion = nn.HuberLoss()
    optimizer = optim.AdamW(model.parameters(), lr=3e-4, weight_decay=1e-4)
    train_losses = []
    val_losses = []
    val_r2s = []
    val_mapes = []
    best_val_r2 = float('-inf')
    best_model_path = None

    # 创建新的子文件夹
    log_index = get_next_log_index(logs_dir)
    log_dir = os.path.join(logs_dir, f'logs_{log_index}')
    os.makedirs(log_dir)

    for epoch in tqdm(range(num_epochs)):
        train_loss = train(model, train_loader, criterion, optimizer, device)
        val_loss, val_r2, val_mape = validate(model, val_loader, criterion, device)
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        val_r2s.append(val_r2)
        val_mapes.append(val_mape)

        # Save the model if validation R2 is the best we've seen so far
        if val_r2 > best_val_r2:
            best_val_r2 = val_r2
            best_model_path = os.path.join(log_dir, f'best_model_epoch_{epoch+1}_r2_{val_r2:.4f}.pth')
            torch.save(model.state_dict(), best_model_path)

        print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val R2: {val_r2:.4f}, Val MAPE: {val_mape:.4f}')

    # Load the best model before returning
    if best_model_path:
        model.load_state_dict(torch.load(best_model_path))
        print(f'Loaded best model from {best_model_path}')
    return train_losses, val_losses, val_r2s, val_mapes, best_model_path

# 滚动预测函数（带再训练）
def rolling_predict_with_retraining(model, train_data, val_data, test_data, window_size, device, num_epochs=20):
    model.eval()
    predictions = []
    criterion = nn.HuberLoss()
    optimizer = optim.AdamW(model.parameters(), lr=3e-4)
    updated_train_data = pd.concat([train_data, val_data])

    for i in tqdm(range(len(test_data)), desc="Rolling Prediction"):
        # 获取滚动窗口数据
        window_data = pd.concat([updated_train_data, test_data[:i]], axis=0).tail(window_size)
        window_dataset = StockDataset(window_data, mean=train_dataset.mean, std=train_dataset.std)
        window_loader = DataLoader(window_dataset, batch_size=len(window_dataset), shuffle=False)

        # 进行预测
        with torch.no_grad():
            for features, _ in window_loader:
                features = features.to(device)
                prediction = model(features)
                predictions.append(prediction[-1].item())

        # 将预测值加入训练数据（模拟实际情况）
        new_row = test_data.iloc[i].copy()
        new_row['Close'] = predictions[-1]
        updated_train_data = pd.concat([updated_train_data, new_row.to_frame().T])

        # 重新训练模型
        train_loader = DataLoader(window_dataset, batch_size=batch_size, shuffle=False)
        for epoch in range(num_epochs):
            train_loss = train(model, train_loader, criterion, optimizer, device)

    return predictions


In [20]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size = 128

# 定义一个 KAN 模型实例
model = KAN(
    layers_hidden=[len(features), 64, 128, 64, 1],
    grid_size=5,
    spline_order=3,
    scale_noise=0.1,
    scale_base=1.0,
    scale_spline=1.0,
    base_activation=torch.nn.ReLU,
    grid_eps=0.02,
    grid_range=[-1, 1],
).to(device)
num_epochs = 70

train_dataset = StockDataset(train_data)
val_dataset = StockDataset(val_data, train_dataset.mean, train_dataset.std)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

train_losses, val_losses, val_r2s, val_mapes, best_model_path = train_and_validate(model, train_loader, val_loader, num_epochs, device)


  3%|▎         | 2/70 [00:00<00:12,  5.50it/s]

Epoch 1/70, Train Loss: 32.2924, Val Loss: 67.9315, Val R2: -16.9374, Val MAPE: 0.9945
Epoch 2/70, Train Loss: 31.9455, Val Loss: 67.1214, Val R2: -16.5131, Val MAPE: 0.9828


  6%|▌         | 4/70 [00:00<00:12,  5.36it/s]

Epoch 3/70, Train Loss: 31.0671, Val Loss: 64.9243, Val R2: -15.3861, Val MAPE: 0.9511
Epoch 4/70, Train Loss: 28.7002, Val Loss: 58.9786, Val R2: -12.5207, Val MAPE: 0.8657


  9%|▊         | 6/70 [00:01<00:11,  5.43it/s]

Epoch 5/70, Train Loss: 22.5959, Val Loss: 43.6989, Val R2: -6.4253, Val MAPE: 0.6469
Epoch 6/70, Train Loss: 10.7808, Val Loss: 15.4852, Val R2: -0.0242, Val MAPE: 0.2442


 11%|█▏        | 8/70 [00:01<00:11,  5.47it/s]

Epoch 7/70, Train Loss: 6.8789, Val Loss: 6.5120, Val R2: 0.7627, Val MAPE: 0.1155
Epoch 8/70, Train Loss: 6.1533, Val Loss: 7.6207, Val R2: 0.7238, Val MAPE: 0.1331


 14%|█▍        | 10/70 [00:01<00:11,  5.44it/s]

Epoch 9/70, Train Loss: 4.7887, Val Loss: 4.2210, Val R2: 0.8823, Val MAPE: 0.0817
Epoch 10/70, Train Loss: 4.1791, Val Loss: 3.4179, Val R2: 0.9121, Val MAPE: 0.0672


 17%|█▋        | 12/70 [00:02<00:10,  5.46it/s]

Epoch 11/70, Train Loss: 3.4845, Val Loss: 3.7429, Val R2: 0.9106, Val MAPE: 0.0667
Epoch 12/70, Train Loss: 3.0218, Val Loss: 3.7233, Val R2: 0.9147, Val MAPE: 0.0635


 20%|██        | 14/70 [00:02<00:10,  5.48it/s]

Epoch 13/70, Train Loss: 2.6099, Val Loss: 3.5515, Val R2: 0.9220, Val MAPE: 0.0592
Epoch 14/70, Train Loss: 2.2409, Val Loss: 3.3835, Val R2: 0.9274, Val MAPE: 0.0547


 23%|██▎       | 16/70 [00:02<00:09,  5.48it/s]

Epoch 15/70, Train Loss: 1.8742, Val Loss: 3.2269, Val R2: 0.9324, Val MAPE: 0.0512
Epoch 16/70, Train Loss: 1.4844, Val Loss: 3.0527, Val R2: 0.9378, Val MAPE: 0.0479


 26%|██▌       | 18/70 [00:03<00:09,  5.51it/s]

Epoch 17/70, Train Loss: 1.1715, Val Loss: 3.1392, Val R2: 0.9340, Val MAPE: 0.0483
Epoch 18/70, Train Loss: 0.9765, Val Loss: 3.0721, Val R2: 0.9366, Val MAPE: 0.0470


 29%|██▊       | 20/70 [00:03<00:09,  5.47it/s]

Epoch 19/70, Train Loss: 0.7580, Val Loss: 2.7517, Val R2: 0.9472, Val MAPE: 0.0427
Epoch 20/70, Train Loss: 0.6216, Val Loss: 2.6239, Val R2: 0.9507, Val MAPE: 0.0407


 30%|███       | 21/70 [00:03<00:08,  5.47it/s]

Epoch 21/70, Train Loss: 0.5576, Val Loss: 2.0038, Val R2: 0.9677, Val MAPE: 0.0330


 31%|███▏      | 22/70 [00:04<00:09,  5.27it/s]

Epoch 22/70, Train Loss: 0.5513, Val Loss: 1.4863, Val R2: 0.9807, Val MAPE: 0.0274


 34%|███▍      | 24/70 [00:04<00:09,  4.88it/s]

Epoch 23/70, Train Loss: 0.5173, Val Loss: 1.4038, Val R2: 0.9821, Val MAPE: 0.0259
Epoch 24/70, Train Loss: 0.4552, Val Loss: 2.3718, Val R2: 0.9585, Val MAPE: 0.0372


 37%|███▋      | 26/70 [00:04<00:08,  5.19it/s]

Epoch 25/70, Train Loss: 0.9837, Val Loss: 3.5214, Val R2: 0.9279, Val MAPE: 0.0557
Epoch 26/70, Train Loss: 1.1179, Val Loss: 2.5111, Val R2: 0.9526, Val MAPE: 0.0388


 40%|████      | 28/70 [00:05<00:07,  5.36it/s]

Epoch 27/70, Train Loss: 0.9086, Val Loss: 1.0539, Val R2: 0.9885, Val MAPE: 0.0218
Epoch 28/70, Train Loss: 0.5413, Val Loss: 1.3615, Val R2: 0.9831, Val MAPE: 0.0255


 43%|████▎     | 30/70 [00:05<00:07,  5.45it/s]

Epoch 29/70, Train Loss: 0.3830, Val Loss: 1.6949, Val R2: 0.9751, Val MAPE: 0.0285
Epoch 30/70, Train Loss: 0.7462, Val Loss: 3.2331, Val R2: 0.9369, Val MAPE: 0.0515


 46%|████▌     | 32/70 [00:05<00:06,  5.50it/s]

Epoch 31/70, Train Loss: 1.1069, Val Loss: 2.8794, Val R2: 0.9421, Val MAPE: 0.0435
Epoch 32/70, Train Loss: 0.7515, Val Loss: 1.1224, Val R2: 0.9874, Val MAPE: 0.0226


 49%|████▊     | 34/70 [00:06<00:06,  5.48it/s]

Epoch 33/70, Train Loss: 0.4822, Val Loss: 1.3700, Val R2: 0.9827, Val MAPE: 0.0253
Epoch 34/70, Train Loss: 0.3425, Val Loss: 2.0052, Val R2: 0.9676, Val MAPE: 0.0322


 51%|█████▏    | 36/70 [00:06<00:06,  5.46it/s]

Epoch 35/70, Train Loss: 0.7512, Val Loss: 3.1244, Val R2: 0.9403, Val MAPE: 0.0499
Epoch 36/70, Train Loss: 1.0688, Val Loss: 2.3951, Val R2: 0.9564, Val MAPE: 0.0377


 54%|█████▍    | 38/70 [00:07<00:05,  5.46it/s]

Epoch 37/70, Train Loss: 1.2099, Val Loss: 1.3743, Val R2: 0.9828, Val MAPE: 0.0257
Epoch 38/70, Train Loss: 1.3304, Val Loss: 3.1100, Val R2: 0.9437, Val MAPE: 0.0510


 57%|█████▋    | 40/70 [00:07<00:05,  5.47it/s]

Epoch 39/70, Train Loss: 2.3620, Val Loss: 1.6827, Val R2: 0.9766, Val MAPE: 0.0301
Epoch 40/70, Train Loss: 1.1605, Val Loss: 3.3890, Val R2: 0.9237, Val MAPE: 0.0511


 60%|██████    | 42/70 [00:07<00:05,  5.45it/s]

Epoch 41/70, Train Loss: 1.3776, Val Loss: 3.0684, Val R2: 0.9400, Val MAPE: 0.0479
Epoch 42/70, Train Loss: 1.5819, Val Loss: 2.7750, Val R2: 0.9465, Val MAPE: 0.0426


 63%|██████▎   | 44/70 [00:08<00:04,  5.43it/s]

Epoch 43/70, Train Loss: 0.3525, Val Loss: 1.5242, Val R2: 0.9797, Val MAPE: 0.0273
Epoch 44/70, Train Loss: 0.3052, Val Loss: 1.6895, Val R2: 0.9756, Val MAPE: 0.0284


 66%|██████▌   | 46/70 [00:08<00:04,  5.45it/s]

Epoch 45/70, Train Loss: 0.3502, Val Loss: 2.7081, Val R2: 0.9496, Val MAPE: 0.0420
Epoch 46/70, Train Loss: 0.5237, Val Loss: 3.1597, Val R2: 0.9376, Val MAPE: 0.0496


 69%|██████▊   | 48/70 [00:08<00:04,  5.46it/s]

Epoch 47/70, Train Loss: 0.4639, Val Loss: 2.1298, Val R2: 0.9644, Val MAPE: 0.0342
Epoch 48/70, Train Loss: 0.7076, Val Loss: 1.2477, Val R2: 0.9857, Val MAPE: 0.0252


 70%|███████   | 49/70 [00:09<00:04,  5.10it/s]

Epoch 49/70, Train Loss: 0.9132, Val Loss: 2.5312, Val R2: 0.9562, Val MAPE: 0.0406


 73%|███████▎  | 51/70 [00:09<00:03,  5.01it/s]

Epoch 50/70, Train Loss: 1.6217, Val Loss: 0.6362, Val R2: 0.9942, Val MAPE: 0.0155
Epoch 51/70, Train Loss: 0.8528, Val Loss: 1.4903, Val R2: 0.9812, Val MAPE: 0.0288


 76%|███████▌  | 53/70 [00:09<00:03,  5.23it/s]

Epoch 52/70, Train Loss: 1.7347, Val Loss: 2.5497, Val R2: 0.9543, Val MAPE: 0.0401
Epoch 53/70, Train Loss: 0.8899, Val Loss: 1.2681, Val R2: 0.9845, Val MAPE: 0.0230


 79%|███████▊  | 55/70 [00:10<00:02,  5.26it/s]

Epoch 54/70, Train Loss: 0.8136, Val Loss: 1.5010, Val R2: 0.9803, Val MAPE: 0.0278
Epoch 55/70, Train Loss: 1.4532, Val Loss: 1.8776, Val R2: 0.9711, Val MAPE: 0.0315


 81%|████████▏ | 57/70 [00:10<00:02,  5.31it/s]

Epoch 56/70, Train Loss: 1.0057, Val Loss: 2.2419, Val R2: 0.9643, Val MAPE: 0.0370
Epoch 57/70, Train Loss: 1.6077, Val Loss: 1.8309, Val R2: 0.9726, Val MAPE: 0.0312


 84%|████████▍ | 59/70 [00:11<00:02,  5.34it/s]

Epoch 58/70, Train Loss: 0.7859, Val Loss: 1.8781, Val R2: 0.9723, Val MAPE: 0.0327
Epoch 59/70, Train Loss: 1.1974, Val Loss: 2.9975, Val R2: 0.9433, Val MAPE: 0.0474


 87%|████████▋ | 61/70 [00:11<00:01,  5.36it/s]

Epoch 60/70, Train Loss: 1.3748, Val Loss: 2.6726, Val R2: 0.9503, Val MAPE: 0.0413
Epoch 61/70, Train Loss: 0.5350, Val Loss: 1.6199, Val R2: 0.9782, Val MAPE: 0.0296


 90%|█████████ | 63/70 [00:11<00:01,  5.35it/s]

Epoch 62/70, Train Loss: 0.9209, Val Loss: 2.2471, Val R2: 0.9635, Val MAPE: 0.0361
Epoch 63/70, Train Loss: 1.1767, Val Loss: 1.0370, Val R2: 0.9882, Val MAPE: 0.0199


 93%|█████████▎| 65/70 [00:12<00:00,  5.39it/s]

Epoch 64/70, Train Loss: 0.7174, Val Loss: 1.4714, Val R2: 0.9814, Val MAPE: 0.0281
Epoch 65/70, Train Loss: 1.2286, Val Loss: 1.3782, Val R2: 0.9822, Val MAPE: 0.0248


 96%|█████████▌| 67/70 [00:12<00:00,  5.30it/s]

Epoch 66/70, Train Loss: 0.6920, Val Loss: 1.3396, Val R2: 0.9831, Val MAPE: 0.0238
Epoch 67/70, Train Loss: 1.0084, Val Loss: 1.5922, Val R2: 0.9785, Val MAPE: 0.0289


 99%|█████████▊| 69/70 [00:12<00:00,  5.32it/s]

Epoch 68/70, Train Loss: 1.1355, Val Loss: 2.0777, Val R2: 0.9666, Val MAPE: 0.0342
Epoch 69/70, Train Loss: 1.0162, Val Loss: 2.2341, Val R2: 0.9639, Val MAPE: 0.0361


100%|██████████| 70/70 [00:13<00:00,  5.35it/s]

Epoch 70/70, Train Loss: 1.4310, Val Loss: 1.7259, Val R2: 0.9756, Val MAPE: 0.0302
Loaded best model from KAN_logs/logs_16/best_model_epoch_50_r2_0.9942.pth





In [21]:

# 在测试集上进行滚动预测
window_size = len(pd.concat([train_data, val_data]))  # 使用训练和验证数据作为初始窗口
model.load_state_dict(torch.load(best_model_path))  # 确保使用验证集上表现最好的模型
print(f'Using model from {best_model_path} for rolling prediction.')
test_predictions = rolling_predict_with_retraining(model, train_data, val_data, test_data, window_size, device)

# 计算测试集 R2 和 MAPE
test_actuals = test_data['Close'].values
test_r2 = r2_score(test_actuals, test_predictions)
test_mape = mean_absolute_percentage_error(test_actuals, test_predictions)

print(f'Test R2: {test_r2:.4f}')
print(f'Test MAPE: {test_mape:.4f}')


Using model from KAN_logs/logs_16/best_model_epoch_50_r2_0.9942.pth for rolling prediction.


Rolling Prediction: 100%|██████████| 423/423 [29:16<00:00,  4.15s/it]

Test R2: 0.9200
Test MAPE: 0.0172





In [22]:

# 保存预测数据到CSV
predictions_df = pd.DataFrame({
    'Actual': test_actuals,
    'Predicted': test_predictions
})
predictions_csv_path = os.path.join(logs_dir, 'predictions.csv')
predictions_df.to_csv(predictions_csv_path, index=False)
print(f'Saved predictions to {predictions_csv_path}')

# 使用 Plotly 可视化训练和验证过程中的损失、R2 和 MAPE
epochs = list(range(1, num_epochs + 1))

fig_loss = go.Figure()
fig_loss.add_trace(go.Scatter(x=epochs, y=train_losses, mode='lines', name='Train Loss'))
fig_loss.add_trace(go.Scatter(x=epochs, y=val_losses, mode='lines', name='Validation Loss'))
fig_loss.update_layout(title='Train and Validation Loss', xaxis=dict(title='Epoch'), yaxis=dict(title='Loss'))
fig_loss.show()

fig_r2 = go.Figure()
fig_r2.add_trace(go.Scatter(x=epochs, y=val_r2s, mode='lines', name='Validation R2'))
fig_r2.update_layout(title='Validation R2 Over Epochs', xaxis=dict(title='Epoch'), yaxis=dict(title='R2'))
fig_r2.show()

fig_mape = go.Figure()
fig_mape.add_trace(go.Scatter(x=epochs, y=val_mapes, mode='lines', name='Validation MAPE'))
fig_mape.update_layout(title='Validation MAPE Over Epochs', xaxis=dict(title='Epoch'), yaxis=dict(title='MAPE'))
fig_mape.show()

# 使用 Plotly 可视化滚动预测结果与实际结果
test_trace_actual = go.Scatter(x=test_data.index, y=test_actuals, mode='lines', name='Test Actual Close Price')
test_trace_predicted = go.Scatter(x=test_data.index, y=test_predictions, mode='lines', name='Test Predicted Close Price')

layout = go.Layout(title='Rolling Prediction vs Actual Close Price', xaxis=dict(title='Time'), yaxis=dict(title='Close Price'))

fig = go.Figure(data=[test_trace_actual, test_trace_predicted], layout=layout)
fig.show()


Saved predictions to KAN_logs/predictions.csv
