In [2]:
import pandas as pd
import numpy as np

# =============================
# 1. 数据加载
# =============================
def load_data(file_path):
    """加载原始数据并转换数据类型"""
    df = pd.read_csv(file_path)

    # 转换时间格式并设为索引
    df['datetime'] = pd.to_datetime(df['datetime'], errors='coerce')
    df.set_index('datetime', inplace=True)
    
    # 数据类型转换
    numeric_cols = [
        'price', 'volume', 'turnover', 'ask_order', 'bid_order', 
        'num', 'count', 'exchtime', 'localtime'
    ]
    df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors='coerce')

    return df

# =============================
# 2. 数据清洗
# =============================
def process_data(df):
    """数据清洗与预处理"""
    # 前向填充 & 用 0 填充
    df.fillna(method='ffill', inplace=True)
    df.fillna(0, inplace=True)
    
    # 去重
    df.drop_duplicates(inplace=True)
    # 筛掉不合法的价格/成交量
    df = df[(df['price'] >= 0) & (df['volume'] >= 0)]

    # 构造一个简单的 price_tick
    df['price_tick'] = df['price'] / df['price'].max()

    # 转换 time 列，如果没有这列可注释掉
    if 'time' in df.columns:
        df['time'] = pd.to_datetime(df['time'], format='%H%M%S%f', errors='coerce')
    
    return df

# =============================
# 3. 特征工程
# =============================
def generate_features(df):
    """
    特征工程：构造市场特征、订单特征和宏观特征
    """
    df['mid_price'] = (df['price'].shift(-1) + df['price']) / 2
    df['price_change'] = df['price'].shift(-20) - df['price']
    df['volume_change'] = df['volume'].diff()

    # 过去 N tick 的均值变化
    df['rolling_mean_20'] = df['price'].rolling(window=20).mean()
    df['rolling_mean_120'] = df['price'].rolling(window=120).mean()

    df['ask_bid_spread'] = df['ask_order'] - df['bid_order']
    df['order_flow_imbalance'] = (df['ask_order'] - df['bid_order']) / (df['ask_order'] + df['bid_order'] + 1e-6)

    df['turnover_change'] = df['turnover'].diff()
    df['volume_ratio_20'] = df['volume'] / (df['volume'].rolling(20).mean() + 1e-6)
    df['turnover_ratio_20'] = df['turnover'] / (df['turnover'].rolling(20).mean() + 1e-6)

    df['volatility_20'] = df['price'].rolling(20).std()

    df['future_return_20'] = df['price'].shift(-20) / df['price'] - 1
    df['future_return_120'] = df['price'].shift(-120) / df['price'] - 1

    df['flag_encoded'] = pd.factorize(df['flag'])[0]

    # ========== 🔍 处理无穷大、过大数值、NaN ==========
    df.replace([np.inf, -np.inf], np.nan, inplace=True)  # 将 inf 替换为 NaN
    df.fillna(0, inplace=True)                          # 将 NaN 替换为 0

    return df

# =============================
# 4. 数据保存
# =============================
def save_data(df, file_path):
    """保存处理后的数据"""
    df.to_csv(file_path, index=True)

In [3]:
import pandas as pd
import numpy as np

# =============================
# 1. 计算 PnL (Episodic Profit and Loss)
# =============================
def calculate_pnl(df):
    df['side'] = np.where(df['order_type'] == 1, 1, -1)
    df['pnl'] = df['side'] * (df['price'].shift(-1) - df['price']) * df['volume']
    return df['pnl'].sum()

# =============================
# 2. 计算 MAP (Mean Absolute Position)
# =============================
def calculate_map(df):
    if 'inventory' not in df.columns:
        df['inventory'] = df['volume'].cumsum()
    return df['inventory'].abs().mean()

# =============================
# 3. 计算 Adverse Selection Ratio
# =============================
def calculate_adverse_selection_ratio(df):
    adverse_conditions = (
        ((df['order_type'] == 1) & (df['price'] > df['mid_price'])) |
        ((df['order_type'] == -1) & (df['price'] < df['mid_price']))
    )
    adverse_orders = df[adverse_conditions].shape[0]
    total_orders = df.shape[0]
    return adverse_orders / total_orders if total_orders > 0 else 0

# =============================
# 4. 计算 RPT (Return Per Trade)
# =============================
def calculate_rpt(df):
    total_trades = df.shape[0]
    total_pnl = calculate_pnl(df)
    return total_pnl / total_trades if total_trades > 0 else 0

# =============================
# 5. 计算 PnL-to-MAP Ratio
# =============================
def calculate_pnl_map_ratio(df):
    pnl = calculate_pnl(df)
    map_value = calculate_map(df)
    return pnl / map_value if map_value != 0 else 0

# =============================
# 6. 评估策略主函数
# =============================
def evaluate_strategy(df):
    metrics = {
        "Total PnL": calculate_pnl(df),
        "Mean Absolute Position (MAP)": calculate_map(df),
        "Adverse Selection Ratio": calculate_adverse_selection_ratio(df),
        "Return Per Trade (RPT)": calculate_rpt(df),
        "PnL-to-MAP Ratio": calculate_pnl_map_ratio(df)
    }

    return metrics

In [4]:
import pandas as pd
import numpy as np

class MarketReplaySimulator:
    def __init__(self, df):
        self.df = df
        self.executed_orders = []

    def market_replay(self):
        """
        市场回放模拟器：执行订单匹配与成交
        """
        self.df['inventory'] = 0  # 初始化库存

        for index, row in self.df.iterrows():
            mid_price = row['mid_price']

            # 模拟多价位订单簿
            ask_price = mid_price + 0.01  # 模拟卖单价
            bid_price = mid_price - 0.01  # 模拟买单价

            # 模拟成交 (价格波动触发成交)
            executed_order = None
            if row['price'] >= ask_price:  # 卖单成交
                executed_order = {
                    'price': ask_price,
                    'volume': row['volume'],
                    'side': 'sell',
                    'inventory': self.df.loc[index, 'inventory'] - row['volume']  # 减持仓
                }
            elif row['price'] <= bid_price:  # 买单成交
                executed_order = {
                    'price': bid_price,
                    'volume': row['volume'],
                    'side': 'buy',
                    'inventory': self.df.loc[index, 'inventory'] + row['volume']  # 增持仓
                }

            if executed_order:
                self.executed_orders.append(executed_order)
                self.df.loc[index, 'inventory'] = executed_order['inventory']

        return pd.DataFrame(self.executed_orders)

    def save_executed_orders(self, output_path):
        """
        保存成交订单数据
        """
        executed_orders_df = pd.DataFrame(self.executed_orders)
        executed_orders_df.to_csv(output_path, index=False)

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import random
from collections import deque

# =============================
# 1. Actor 网络 (策略网络)
# =============================
class Actor(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(Actor, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(state_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, action_dim),
            nn.Tanh()  # Tanh 确保动作值在 (-1, 1) 之间
        )

    def forward(self, x):
        return self.fc(x)

# =============================
# 2. Critic 网络 (价值网络)
# =============================
class Critic(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(Critic, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(state_dim + action_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, state, action):
        x = torch.cat([state, action], dim=1)
        return self.fc(x)

# =============================
# 3. TD3 Agent
# =============================
class TD3_Agent:
    def __init__(self, state_dim, action_dim, lr=0.001, gamma=0.99, tau=0.005):
        self.actor = Actor(state_dim, action_dim)
        self.actor_target = Actor(state_dim, action_dim)
        self.actor_target.load_state_dict(self.actor.state_dict())  # 初始时两者参数相同

        self.critic_1 = Critic(state_dim, action_dim)
        self.critic_1_target = Critic(state_dim, action_dim)
        self.critic_1_target.load_state_dict(self.critic_1.state_dict())

        self.critic_2 = Critic(state_dim, action_dim)
        self.critic_2_target = Critic(state_dim, action_dim)
        self.critic_2_target.load_state_dict(self.critic_2.state_dict())

        self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=lr)
        self.critic_1_optimizer = optim.Adam(self.critic_1.parameters(), lr=lr)
        self.critic_2_optimizer = optim.Adam(self.critic_2.parameters(), lr=lr)

        self.gamma = gamma
        self.tau = tau

        self.replay_buffer = deque(maxlen=100000)  # 经验回放

    # =============================
    # 4. 经验回放 (Replay Buffer)
    # =============================
    def store_experience(self, state, action, reward, next_state, done):
        self.replay_buffer.append((state, action, reward, next_state, done))

    def sample_experience(self, batch_size):
        return random.sample(self.replay_buffer, batch_size)

    # =============================
    # 5. 训练函数
    # =============================
    def train(self, batch_size=64):
        if len(self.replay_buffer) < batch_size:
            return  # 数据不足时不训练

        batch = self.sample_experience(batch_size)
        state, action, reward, next_state, done = zip(*batch)

        state = torch.FloatTensor(state)
        action = torch.FloatTensor(action)
        reward = torch.FloatTensor(reward).unsqueeze(1)
        next_state = torch.FloatTensor(next_state)
        done = torch.FloatTensor(done).unsqueeze(1)

        # 目标策略网络的动作 + 目标噪声 (TD3 特有机制)
        noise = torch.randn_like(action) * 0.2
        noise = noise.clamp(-0.5, 0.5)
        next_action = self.actor_target(next_state) + noise

        # 目标 Q 值 (取两个 Critic 的最小值)
        target_q1 = self.critic_1_target(next_state, next_action)
        target_q2 = self.critic_2_target(next_state, next_action)
        target_q = reward + self.gamma * (1 - done) * torch.min(target_q1, target_q2)

        # Critic 网络优化
        current_q1 = self.critic_1(state, action)
        current_q2 = self.critic_2(state, action)

        critic_1_loss = F.mse_loss(current_q1, target_q.detach())
        critic_2_loss = F.mse_loss(current_q2, target_q.detach())

        self.critic_1_optimizer.zero_grad()
        critic_1_loss.backward()
        self.critic_1_optimizer.step()

        self.critic_2_optimizer.zero_grad()
        critic_2_loss.backward()
        self.critic_2_optimizer.step()

        # 每 2 次 critic 更新后才更新 actor
        if np.random.randint(0, 2) == 0:
            actor_loss = -self.critic_1(state, self.actor(state)).mean()
            self.actor_optimizer.zero_grad()
            actor_loss.backward()
            self.actor_optimizer.step()

            # 更新目标网络 (Soft Update)
            for target_param, param in zip(self.actor_target.parameters(), self.actor.parameters()):
                target_param.data.copy_(self.tau * param.data + (1 - self.tau) * target_param.data)

            for target_param, param in zip(self.critic_1_target.parameters(), self.critic_1.parameters()):
                target_param.data.copy_(self.tau * param.data + (1 - self.tau) * target_param.data)

            for target_param, param in zip(self.critic_2_target.parameters(), self.critic_2.parameters()):
                target_param.data.copy_(self.tau * param.data + (1 - self.tau) * target_param.data)

    # =============================
    # 6. 保存 & 加载模型
    # =============================
    def save_model(self, path):
        torch.save(self.actor.state_dict(), path)

    def load_model(self, path):
        self.actor.load_state_dict(torch.load(path))
        self.actor_target.load_state_dict(torch.load(path))

In [6]:
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, accuracy_score
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import joblib  # 模型保存
import numpy as np

# =============================
# 1. LightGBM 模型 (梯度提升树)
# =============================
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np
import joblib

import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np
import joblib

def train_lightgbm(df, model_path):
    """
    训练 LightGBM 模型以预测短期/长期价格趋势信号
    """
    features = [
        'mid_price', 'volume_change', 'rolling_mean_20',
        'rolling_mean_120', 'ask_bid_spread', 'order_flow_imbalance'
    ]
    target = 'future_return_20'

    # 数据分割
    X_train, X_test, y_train, y_test = train_test_split(df[features], df[target], test_size=0.3, random_state=42)

    # ========== 🔍 数据检查并处理 NaN / Inf / 过大数值 ==========
    def clean_data(data):
        data.replace([np.inf, -np.inf], np.nan, inplace=True)  # 将 inf 替换为 NaN
        data.fillna(0, inplace=True)                            # 将 NaN 替换为 0
        return data

    X_train, X_test = clean_data(X_train), clean_data(X_test)
    y_train, y_test = clean_data(y_train), clean_data(y_test)

    # 检查是否仍存在异常值
    if np.any(np.isinf(X_train)) or np.any(np.isinf(y_train)):
        raise ValueError("❗ X_train 或 y_train 仍包含无穷大或异常值")
    if np.any(np.isinf(X_test)) or np.any(np.isinf(y_test)):
        raise ValueError("❗ X_test 或 y_test 仍包含无穷大或异常值")

    # 模型训练
    model = lgb.LGBMRegressor(n_estimators=200, learning_rate=0.05)
    model.fit(X_train, y_train)

    # 模型评估
    preds = model.predict(X_test)
    mse = mean_squared_error(y_test, preds)
    print(f'✅ LightGBM 训练完成 - Mean Squared Error: {mse:.4f}')

    # 保存模型
    joblib.dump(model, model_path)
    print(f'✅ LightGBM 模型已保存至: {model_path}')

    return model

# =============================
# 2. LSTM 模型 (时序深度网络)
# =============================
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        return self.fc(lstm_out[:, -1, :])  # 取最后一个时间步的输出

def train_lstm(df, model_path, epochs=20, batch_size=64):
    """
    训练 LSTM 模型用于趋势预测
    """
    features = ['mid_price', 'volume_change', 'rolling_mean_20', 'rolling_mean_120']
    target = 'future_return_20'

    X = torch.tensor(df[features].values, dtype=torch.float32).unsqueeze(1)  # 添加时间维度
    y = torch.tensor(df[target].values, dtype=torch.float32).unsqueeze(1)

    model = LSTMModel(input_dim=X.shape[2], hidden_dim=32, output_dim=1)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    loss_fn = nn.MSELoss()

    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        preds = model(X)
        loss = loss_fn(preds, y)
        loss.backward()
        optimizer.step()

        if (epoch + 1) % 5 == 0:
            print(f'Epoch {epoch + 1}/{epochs} - Loss: {loss.item():.4f}')

    # 保存模型
    torch.save(model.state_dict(), model_path)
    print(f'✅ LSTM 模型已保存至: {model_path}')

    return model

# =============================
# 3. 模型加载
# =============================
def load_lightgbm(model_path):
    """加载 LightGBM 模型"""
    return joblib.load(model_path)

def load_lstm(model_path, input_dim):
    """加载 LSTM 模型"""
    model = LSTMModel(input_dim=input_dim, hidden_dim=32, output_dim=1)
    model.load_state_dict(torch.load(model_path))
    return model

# =============================
# 4. 预测信号
# =============================
def predict_signal(df, model, model_type='lightgbm'):
    """
    使用训练好的模型进行信号预测
    """
    features = [
        'mid_price', 'volume_change', 'rolling_mean_20',
        'rolling_mean_120', 'ask_bid_spread', 'order_flow_imbalance'
    ]

    X = df[features]

    if model_type == 'lightgbm':
        return model.predict(X)

    elif model_type == 'lstm':
        X = torch.tensor(X.values, dtype=torch.float32).unsqueeze(1)  # 添加时间维度
        model.eval()
        with torch.no_grad():
            return model(X).numpy().flatten()

# =============================
# 5. 特征筛选
# =============================
def feature_selection(df):
    """
    动态筛选最相关的特征
    """
    correlation = df.corr()['future_return_20'].abs().sort_values(ascending=False)
    top_features = correlation.index[:6].tolist()
    return top_features

In [7]:
from src.data_preprocessing import load_data, process_data, generate_features
from src.signal_model import train_lightgbm, train_lstm, load_lightgbm, load_lstm
from src.market_replay import MarketReplaySimulator
from src.rl_model import TD3_Agent
from src.evaluation import evaluate_strategy
import pandas as pd
import os

# =============================
# 配置路径
# =============================
RAW_DATA_PATH = "/nas197/uhome/zhangrui/merged_transaction_20241113.csv"
PROCESSED_DATA_PATH = "./data/processed/processed_data.csv"
FEATURES_PATH = "./data/features/executed_orders.csv"
LGB_MODEL_PATH = "./models/signal_model.pkl"
LSTM_MODEL_PATH = "./models/lstm_model.pth"
RL_MODEL_PATH = "./models/rl_model.pth"

# === 新增：评价指标与可视化文件的保存路径 ===
EVAL_METRICS_PATH = "./results/evaluation_metrics.txt"
EQUITY_CURVE_PATH = "./results/equity_curve.png"

# =============================
# 1. 数据预处理流程
# =============================
def preprocess_data():
    print("🔄 加载原始数据...")
    data = load_data(RAW_DATA_PATH)
    print(f"✅ 数据加载完成, 数据维度: {data.shape}")

    print("🔄 数据清洗和预处理...")
    processed_data = process_data(data)
    feature_data = generate_features(processed_data)
    
    print("💾 正在保存处理后的数据...")
    feature_data.to_csv(PROCESSED_DATA_PATH, index=False)
    print(f"✅ 数据已保存至: {PROCESSED_DATA_PATH}")
    
    return feature_data

# =============================
# 2. 训练信号模型
# =============================
def train_signal_models(feature_data):
    print("🚀 开始训练 LightGBM 信号模型...")
    train_lightgbm(feature_data, LGB_MODEL_PATH)

    print("🚀 开始训练 LSTM 信号模型...")
    train_lstm(feature_data, LSTM_MODEL_PATH)

# =============================
# 3. 市场回放模拟器
# =============================
def run_market_replay(feature_data):
    print("🚀 启动市场回放模拟器...")
    simulator = MarketReplaySimulator(feature_data)
    executed_orders = simulator.market_replay()

    print("💾 正在保存成交订单数据...")
    simulator.save_executed_orders(FEATURES_PATH)
    print(f"✅ 成交订单数据已保存至: {FEATURES_PATH}")

# =============================
# 4. 强化学习模型训练
# =============================
def train_rl_model(feature_data):
    print("🚀 开始训练 TD3 强化学习模型...")

    simulator = MarketReplaySimulator(feature_data)
    state_dim = simulator.state_dim
    action_dim = simulator.action_dim

    rl_agent = TD3_Agent(state_dim=state_dim, action_dim=action_dim)

    if os.path.exists(RL_MODEL_PATH):
        print("🔄 加载现有 RL 模型，进行断点续训...")
        rl_agent.load_model(RL_MODEL_PATH)

    rl_agent.train(batch_size=64)
    rl_agent.save_model(RL_MODEL_PATH)
    print(f"✅ RL 模型已保存至: {RL_MODEL_PATH}")

# =============================
# 5. 模型评估
# =============================
def evaluate_models(feature_data):
    print("📊 正在评估模型表现...")

    lightgbm_model = load_lightgbm(LGB_MODEL_PATH)
    lstm_model = load_lstm(LSTM_MODEL_PATH, input_dim=4)

    metrics = evaluate_strategy(feature_data)
    print("✅ 模型评估结果:")
    for key, value in metrics.items():
        print(f"{key}: {value:.4f}")

    # ============ 以下为新增的指标保存与可视化部分 ============

    # 1) 把 14 个指标写入指定txt文件
    metric_order = [
        "Annualized Return",
        "Sharpe Ratio",
        "Max Drawdown",
        "Volatility",
        "Downside Volatility",
        "Sortino Ratio",
        "Information Ratio",
        "Win Rate",
        "Profit/Loss Ratio",
        "Trading Frequency",
        "Alpha",
        "Beta",
        "Calmar Ratio",
        "Strategy Consistency"
    ]
    # 写入 EVAL_METRICS_PATH
    with open(EVAL_METRICS_PATH, "w", encoding="utf-8") as f:
        f.write("以下为 14 项关键评价指标：\n\n")
        for m in metric_order:
            val = metrics.get(m, 0.0)
            f.write(f"{m}: {val}\n")

    # 2) 绘制权益曲线并保存为图片
    if "equity_curve" in metrics:
        import matplotlib.pyplot as plt
        plt.figure(figsize=(10, 6))
        metrics["equity_curve"].plot()
        plt.title("Equity Curve")
        plt.xlabel("Date")
        plt.ylabel("Portfolio Value")
        plt.grid(True)
        plt.savefig(EQUITY_CURVE_PATH, dpi=150, bbox_inches="tight")
        plt.close()
        print(f"✅ 已保存权益曲线图到: {EQUITY_CURVE_PATH}")

# =============================
# 6. 训练流程整合
# =============================
def train_pipeline():
    # Step 1: 数据预处理
    feature_data = preprocess_data()

    # Step 2: 训练信号模型
    train_signal_models(feature_data)

    # Step 3: 运行市场回放
    run_market_replay(feature_data)

    # Step 4: 训练 RL 模型
    train_rl_model(feature_data)

    # Step 5: 模型评估
    evaluate_models(feature_data)

    print("✅ 完整训练流程完成！")

In [None]:
from src.data_preprocessing import load_data, process_data, generate_features
from src.signal_model import train_lightgbm, train_lstm
from src.market_replay import MarketReplaySimulator
from src.rl_model import TD3_Agent
from src.evaluation import evaluate_strategy
from src.train import train_pipeline

import pandas as pd
import os
import sys

# =============================
# 配置路径
# =============================
RAW_DATA_PATH = "/nas197/uhome/zhangrui/merged_transaction_20241113.csv"
PROCESSED_DATA_PATH = "./data/processed/processed_data.csv"
FEATURES_PATH = "./data/features/executed_orders.csv"
LGB_MODEL_PATH = "./models/signal_model.pkl"
LSTM_MODEL_PATH = "./models/lstm_model.pth"
RL_MODEL_PATH = "./models/rl_model.pth"

# === 新增：评价指标与可视化文件的保存路径 ===
EVAL_METRICS_PATH = "./results/evaluation_metrics.txt"
EQUITY_CURVE_PATH = "./results/equity_curve.png"

# =============================
# 日志文件配置
# =============================
RESULT_LOG_PATH = "./results/result_001.txt"

class Logger:
    def __init__(self, log_path):
        self.terminal = sys.stdout
        self.log = open(log_path, "w", encoding="utf-8")

    def write(self, message):
        self.terminal.write(message)
        self.log.write(message)

    def flush(self):
        self.terminal.flush()
        self.log.flush()

# 启动日志系统
sys.stdout = Logger(RESULT_LOG_PATH)

# =============================
# 1. 主流程入口
# =============================
def main():
    print("🚀 IMM 策略复现项目启动...")

    # Step 1: 数据预处理
    if not os.path.exists(PROCESSED_DATA_PATH):
        print("🔄 [1/5] 数据预处理中...")
        data = load_data(RAW_DATA_PATH)
        processed_data = process_data(data)
        feature_data = generate_features(processed_data)
        feature_data.to_csv(PROCESSED_DATA_PATH, index=False)
        print(f"✅ 数据预处理完成，已保存至: {PROCESSED_DATA_PATH}")
    else:
        print(f"✅ 已发现已处理数据文件: {PROCESSED_DATA_PATH}")
        feature_data = pd.read_csv(PROCESSED_DATA_PATH)

    # Step 2: 训练信号模型
    print("🚀 [2/5] 训练信号模型...")
    train_lightgbm(feature_data, LGB_MODEL_PATH)
    train_lstm(feature_data, LSTM_MODEL_PATH)

    # Step 3: 市场回放
    print("🚀 [3/5] 启动市场回放模拟器...")
    if not os.path.exists(FEATURES_PATH):
        simulator = MarketReplaySimulator(feature_data)
        simulator.market_replay()
        simulator.save_executed_orders(FEATURES_PATH)
        print(f"✅ 市场回放完成，已保存至: {FEATURES_PATH}")
    else:
        print(f"✅ 市场回放数据已存在，跳过回放。")

    # Step 4: 强化学习模型训练
    print("🚀 [4/5] 训练强化学习模型...")
    simulator = MarketReplaySimulator(feature_data)
    rl_agent = TD3_Agent(state_dim=simulator.state_dim, action_dim=simulator.action_dim)

    if os.path.exists(RL_MODEL_PATH):
        print("🔄 加载现有 RL 模型，进行断点续训...")
        rl_agent.load_model(RL_MODEL_PATH)

    rl_agent.train(batch_size=64)
    rl_agent.save_model(RL_MODEL_PATH)
    print(f"✅ RL 模型已保存至: {RL_MODEL_PATH}")

    # Step 5: 评估模型
    print("📊 [5/5] 正在评估模型表现...")
    metrics = evaluate_strategy(feature_data)
    print("✅ 模型评估结果:")
    for key, value in metrics.items():
        print(f"{key}: {value:.4f}")

    print("✅ IMM 策略复现项目完成！")

if __name__ == "__main__":
    main()

🚀 IMM 策略复现项目启动...
✅ 已发现已处理数据文件: ./data/processed/processed_data.csv
