# Transformer Apply for QQQ ETF

## 数据预处理：

In [6]:
import sys
sys.path.append('/workspaces/AI-Trader')
import pandas as pd
import torch
from sklearn.preprocessing import MinMaxScaler

# 读取数据
data = pd.read_csv('/workspaces/AI-Trader/data/raw/qqq_history.csv')

# 选择需要的列（如 'Close' 作为目标变量）
data = data[['Date', 'Close']]

# 转换日期格式并排序
data['Date'] = pd.to_datetime(data['Date'])
data = data.sort_values('Date')

# 归一化处理
scaler = MinMaxScaler(feature_range=(0, 1))
data['Close'] = scaler.fit_transform(data[['Close']])

# 创建时间序列数据
def create_sequences(data, seq_len):
    sequences = []
    targets = []
    for i in range(len(data) - seq_len):
        seq = data[i:i + seq_len]
        target = data[i + seq_len]
        sequences.append(seq)
        targets.append(target)
     # 添加特征维度
    sequences = torch.tensor(sequences, dtype=torch.float32).unsqueeze(-1)  # (batch_size, seq_len, 1)
    targets = torch.tensor(targets, dtype=torch.float32)
    return torch.tensor(sequences, dtype=torch.float32), torch.tensor(targets, dtype=torch.float32)

seq_len = 30  # 使用过去 30 天的数据预测未来
X, y = create_sequences(data['Close'].values, seq_len)

  data['Date'] = pd.to_datetime(data['Date'])
  return torch.tensor(sequences, dtype=torch.float32), torch.tensor(targets, dtype=torch.float32)


## 模型训练

In [11]:
# 数据划分
train_size = int(0.7 * len(X))
val_size = int(0.15 * len(X))
test_size = len(X) - train_size - val_size

X_train, X_val, X_test = X[:train_size], X[train_size:train_size + val_size], X[train_size + val_size:]
y_train, y_val, y_test = y[:train_size], y[train_size:train_size + val_size], y[train_size + val_size:]

# 初始化模型
from models.ml.transformer import TimeSeriesTransformer
# 假设 TimeSeriesTransformer 已经定义并导入
feature_dim = 1  # 每天只有一个特征（收盘价）
model = TimeSeriesTransformer(feature_dim=feature_dim, seq_len=seq_len, num_heads=1, hidden_dim=64)

# 训练模型并记录损失和方向准确率
import matplotlib.pyplot as plt

train_losses, val_losses = [], []
train_accuracies, val_accuracies = [], []

for epoch in range(50):  # 假设 50 个 epoch
    train_loss, train_acc = model.train_epoch(X_train, y_train)  # 假设 train_epoch 返回损失和方向准确率
    val_loss, val_acc = model.validate(X_val, y_val)  # 假设 validate 返回损失和方向准确率

    train_losses.append(train_loss)
    val_losses.append(val_loss)
    train_accuracies.append(train_acc)
    val_accuracies.append(val_acc)

    print(f"Epoch [{epoch + 1}/50], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Train Acc: {train_acc:.2f}, Val Acc: {val_acc:.2f}")
# 绘制损失曲线
print("Draw the loss curve")
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Val Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.title('Loss Curve')

# 绘制方向准确率曲线
print("Draw the direction accuracy curve")
plt.subplot(1, 2, 2)
plt.plot(train_accuracies, label='Train Accuracy')
plt.plot(val_accuracies, label='Val Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Accuracy Curve')
plt.show()

# 测试集评估
from sklearn.metrics import mean_squared_error, mean_absolute_error

predictions = model.predict(X_test)
mse = mean_squared_error(y_test, predictions)
mae = mean_absolute_error(y_test, predictions)

# 计算方向准确率
direction_accuracy = (np.sign(predictions) == np.sign(y_test)).mean()

print(f"Test MSE: {mse:.4f}, Test MAE: {mae:.4f}, Direction Accuracy: {direction_accuracy:.2f}")



AttributeError: 'TimeSeriesTransformer' object has no attribute 'train_epoch'

## 预测与策略生成

In [3]:

# 预测
predictions = model.predict(X_test)

# 反归一化
predictions = scaler.inverse_transform(predictions.detach().numpy())
y_test = scaler.inverse_transform(y_test.numpy().reshape(-1, 1))

In [4]:
# 生成交易信号：根据预测值生成买入、卖出或持有信号：
import numpy as np

# 简单策略：如果预测价格高于当前价格，则买入；否则卖出
signals = []
for i in range(len(predictions)):
    if predictions[i] > y_test[i]:
        signals.append('Buy')
    else:
        signals.append('Sell')

# 将信号与日期对应
results = pd.DataFrame({
    'Date': data['Date'].iloc[train_size + seq_len:].values,
    'Actual': y_test.flatten(),
    'Predicted': predictions.flatten(),
    'Signal': signals
})
print(results.head())

                        Date      Actual   Predicted Signal
0  2020-01-27 00:00:00-05:00  211.204575 -288.426941   Sell
1  2020-01-28 00:00:00-05:00  214.448685 -288.426941   Sell
2  2020-01-29 00:00:00-05:00  214.797256 -288.426941   Sell
3  2020-01-30 00:00:00-05:00  215.562286 -288.426941   Sell
4  2020-01-31 00:00:00-05:00  212.143906 -288.426941   Sell


## 评估策略

In [5]:
from sklearn.metrics import mean_squared_error

mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)
print(f'MSE: {mse:.4f}, RMSE: {rmse:.4f}')

MSE: 413447.2500, RMSE: 642.9986
