In [None]:
## install finrl library
!pip install git+https://github.com/AI4Finance-Foundation/FinRL.git

# 导入所需库

In [48]:
# 导入必要的库
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
from stable_baselines3 import A2C, DDPG, PPO, TD3, SAC
import torch
import time

from finrl.agents.stablebaselines3.models import DRLAgent
from finrl.config import INDICATORS, TRAINED_MODEL_DIR, RESULTS_DIR
from finrl.main import check_and_make_directories
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader

# 设置中文显示
plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False    # 用来正常显示负号
plt.rcParams["figure.figsize"] = (15,7)       # 设置图表大小

# 是否启用GPU

In [49]:
# 检查GPU可用性
print("检查GPU可用性...")
use_cuda = torch.cuda.is_available()
if use_cuda:
    cuda_device_count = torch.cuda.device_count()
    cuda_device_name = torch.cuda.get_device_name(0)
    print(f"✓ 发现 {cuda_device_count} 个可用的GPU设备")
    print(f"✓ 当前使用: {cuda_device_name}")
else:
    print("✗ 未发现可用的GPU，将使用CPU进行回测")

# 确保目录存在
check_and_make_directories([TRAINED_MODEL_DIR, 'results'])

检查GPU可用性...
✓ 发现 1 个可用的GPU设备
✓ 当前使用: NVIDIA GeForce RTX 3060 Laptop GPU


# 读取数据

In [50]:
# Part 2. 回测准备

# 加载数据（保持路径不变）
train = pd.read_csv("data/processed_data/train_data_small.csv")
trade = pd.read_csv("data/processed_data/test_data_small.csv")

# 设置索引格式 - 与示例代码保持一致
train = train.set_index(train.columns[0])
train.index.names = [""]
trade = trade.set_index(trade.columns[0])
trade.index.names = [""]

print(f"训练数据共 {len(train)} 条记录")
print(f"测试数据共 {len(trade)} 条记录")

训练数据共 126000 条记录
测试数据共 23500 条记录


# 回测模型选择

In [55]:
# 设置要使用的模型
if_using_a2c = True
if_using_ddpg = True
if_using_ppo = False
if_using_td3 = False
if_using_sac = False

# 加载已训练的模型 并指定哪些需要用cpu 哪些用gpu
trained_a2c = A2C.load(TRAINED_MODEL_DIR + "/agent_a2c",device="cpu") if if_using_a2c else None
trained_ddpg = (
    DDPG.load(TRAINED_MODEL_DIR + "/agent_ddpg", device="cuda")
    if if_using_ddpg
    else None
)
trained_ppo = (
    PPO.load(TRAINED_MODEL_DIR + "/agent_ppo", device="cuda") if if_using_ppo else None
)
trained_td3 = (
    TD3.load(TRAINED_MODEL_DIR + "/agent_td3", device="cuda") if if_using_td3 else None
)
trained_sac = (
    SAC.load(TRAINED_MODEL_DIR + "/agent_sac", device="cuda") if if_using_sac else None
)

# 设置回测参数

In [56]:
# 构建交易环境参数
stock_dimension = len(trade.tic.unique())
state_space = 1 + 2 * stock_dimension + len(INDICATORS) * stock_dimension
print(f"股票维度: {stock_dimension}, 状态空间: {state_space}")

# 设置交易成本和初始持仓
buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension

env_kwargs = {
    "hmax": 100,
    "initial_amount": 1000000,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list,
    "sell_cost_pct": sell_cost_list,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4,
}

# 构建回测环境
e_trade_gym = StockTradingEnv(
    df=trade, turbulence_threshold=70, risk_indicator_col="vix", **env_kwargs
)

股票维度: 500, 状态空间: 5001


# 开始回测

In [57]:
# 执行回测 - 严格按照示例代码顺序
# A2C模型回测
if if_using_a2c:
    print("正在回测A2C模型...")
    df_account_value_a2c, df_actions_a2c = DRLAgent.DRL_prediction(
        model=trained_a2c, environment=e_trade_gym
    )
    print(
        f"A2C回测完成，最终资产: ${df_account_value_a2c['account_value'].iloc[-1]:,.2f}"
    )
else:
    df_account_value_a2c, df_actions_a2c = None, None

# DDPG模型回测
if if_using_ddpg:
    print("正在回测DDPG模型...")
    df_account_value_ddpg, df_actions_ddpg = DRLAgent.DRL_prediction(
        model=trained_ddpg, environment=e_trade_gym
    )
    print(
        f"DDPG回测完成，最终资产: ${df_account_value_ddpg['account_value'].iloc[-1]:,.2f}"
    )
else:
    df_account_value_ddpg, df_actions_ddpg = None, None

# PPO模型回测
if if_using_ppo:
    print("正在回测PPO模型...")
    df_account_value_ppo, df_actions_ppo = DRLAgent.DRL_prediction(
        model=trained_ppo, environment=e_trade_gym
    )
    print(
        f"PPO回测完成，最终资产: ${df_account_value_ppo['account_value'].iloc[-1]:,.2f}"
    )
else:
    df_account_value_ppo, df_actions_ppo = None, None

# TD3模型回测
if if_using_td3:
    print("正在回测TD3模型...")
    df_account_value_td3, df_actions_td3 = DRLAgent.DRL_prediction(
        model=trained_td3, environment=e_trade_gym
    )
    print(
        f"TD3回测完成，最终资产: ${df_account_value_td3['account_value'].iloc[-1]:,.2f}"
    )
else:
    df_account_value_td3, df_actions_td3 = None, None

# SAC模型回测
if if_using_sac:
    print("正在回测SAC模型...")
    df_account_value_sac, df_actions_sac = DRLAgent.DRL_prediction(
        model=trained_sac, environment=e_trade_gym
    )
    print(
        f"SAC回测完成，最终资产: ${df_account_value_sac['account_value'].iloc[-1]:,.2f}"
    )
else:
    df_account_value_sac, df_actions_sac = None, None

正在回测A2C模型...
hit end!
A2C回测完成，最终资产: $954,516.33
正在回测DDPG模型...
hit end!
DDPG回测完成，最终资产: $959,499.14


# 添加参考基准

In [58]:
# Part 3: 均值方差优化(MVO)


# 处理数据为MVO格式
def process_df_for_mvo(df):
    return df.pivot(index="date", columns="tic", values="close")


# 计算股票收益率函数
def StockReturnsComputing(StockPrice, Rows, Columns):
    StockReturn = np.zeros([Rows - 1, Columns])
    for j in range(Columns):  # j: Assets
        for i in range(Rows - 1):  # i: Daily Prices
            StockReturn[i, j] = (
                (StockPrice[i + 1, j] - StockPrice[i, j]) / StockPrice[i, j]
            ) * 100
    return StockReturn


# 准备MVO数据
StockData = process_df_for_mvo(train)
TradeData = process_df_for_mvo(trade)

# 计算收益率和协方差
arStockPrices = np.asarray(StockData)
[Rows, Cols] = arStockPrices.shape
arReturns = StockReturnsComputing(arStockPrices, Rows, Cols)
meanReturns = np.mean(arReturns, axis=0)
covReturns = np.cov(arReturns, rowvar=False)

# 设置输出精度
np.set_printoptions(precision=3, suppress=True)

# 使用PyPortfolioOpt计算最优权重
from pypfopt.efficient_frontier import EfficientFrontier

ef_mean = EfficientFrontier(meanReturns, covReturns, weight_bounds=(0, 0.5))
raw_weights_mean = ef_mean.max_sharpe()  # 最大化夏普比率
cleaned_weights_mean = ef_mean.clean_weights()
mvo_weights = np.array(
    [1000000 * cleaned_weights_mean[i] for i in range(len(cleaned_weights_mean))]
)

# 计算股票份额
LastPrice = np.array([1 / p for p in StockData.tail(1).to_numpy()[0]])
Initial_Portfolio = np.multiply(mvo_weights, LastPrice)

# 计算投资组合价值
Portfolio_Assets = TradeData @ Initial_Portfolio
MVO_result = pd.DataFrame(Portfolio_Assets, columns=["Mean Var"])

In [59]:
# Part 4: S&P 500指数作为基准

# 使用回测时间范围
TRAIN_START_DATE = "2000-01-01"
TRAIN_END_DATE = "2020-01-01"
TRADE_START_DATE = "2025-01-01"
TRADE_END_DATE = "2025-03-14"

# 获取S&P500指数数据
df_spx = YahooDownloader(
    start_date=TRADE_START_DATE, end_date=TRADE_END_DATE, ticker_list=["^GSPC"]
).fetch_data()

# 处理S&P500数据，设置初始资金一致
df_spx = df_spx[["date", "close"]]
fst_day = df_spx["close"].iloc[0]
spx = pd.merge(
    df_spx["date"],
    df_spx["close"].div(fst_day).mul(1000000),
    how="outer",
    left_index=True,
    right_index=True,
).set_index("date")

[*********************100%***********************]  1 of 1 completed

Shape of DataFrame:  (48, 8)





# 整合回测结果

In [60]:
# Part 5: 回测结果整合与可视化

# 处理结果数据
df_result_a2c = (
    df_account_value_a2c.set_index(df_account_value_a2c.columns[0])
    if if_using_a2c and df_account_value_a2c is not None
    else None
)
df_result_ddpg = (
    df_account_value_ddpg.set_index(df_account_value_ddpg.columns[0])
    if if_using_ddpg and df_account_value_ddpg is not None
    else None
)
df_result_ppo = (
    df_account_value_ppo.set_index(df_account_value_ppo.columns[0])
    if if_using_ppo and df_account_value_ppo is not None
    else None
)
df_result_td3 = (
    df_account_value_td3.set_index(df_account_value_td3.columns[0])
    if if_using_td3 and df_account_value_td3 is not None
    else None
)
df_result_sac = (
    df_account_value_sac.set_index(df_account_value_sac.columns[0])
    if if_using_sac and df_account_value_sac is not None
    else None
)

# 创建结果数据框 - 与示例代码保持一致的格式
result_data = {}

if if_using_a2c and df_result_a2c is not None:
    result_data["a2c"] = df_result_a2c["account_value"]

if if_using_ddpg and df_result_ddpg is not None:
    result_data["ddpg"] = df_result_ddpg["account_value"]

if if_using_ppo and df_result_ppo is not None:
    result_data["ppo"] = df_result_ppo["account_value"]

if if_using_td3 and df_result_td3 is not None:
    result_data["td3"] = df_result_td3["account_value"]

if if_using_sac and df_result_sac is not None:
    result_data["sac"] = df_result_sac["account_value"]

# 添加基准
result_data["mvo"] = MVO_result["Mean Var"]
result_data["spx"] = spx["close"]  # 使用S&P500而不是DJIA

# 创建结果DataFrame
result = pd.DataFrame(result_data)

# 绘制结果对比图 - 与示例代码风格一致
plt.rcParams["figure.figsize"] = (15, 5)
plt.figure()
result.plot()
plt.title("回测结果对比")
plt.xlabel("日期")
plt.ylabel("账户价值")
plt.legend()
plt.grid(True, linestyle="--", alpha=0.5)
plt.savefig("results/backtest_comparison.png", dpi=300, bbox_inches="tight")
plt.show()

  plt.show()
