# # 量化策略建模与评估
# ## S&P 500股票预测模型
# 
# ### 项目概述
# 本笔记本展示了完整的深度学习模型训练流程，包括：
# 1. 数据准备和特征工程
# 2. LSTM模型训练与评估
# 3. Transformer模型训练与评估  
# 4. 模型性能对比分析
# 5. 交易策略回测
# 
# ### 技术栈
# - 数据处理：Pandas, NumPy
# - 深度学习：PyTorch
# - 可视化：Matplotlib, Seaborn
# - 回测引擎：自定义BacktestEngine

In [None]:
# %% [markdown]
# ## 1. 环境设置与数据准备

# %%
# 导入必要的库
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import warnings
warnings.filterwarnings('ignore')

# 设置随机种子以确保可重复性
def set_seed(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

# 设置绘图样式
plt.rcParams['figure.figsize'] = [12, 8]
plt.rcParams['font.size'] = 12
sns.set_style("whitegrid")
sns.set_palette("husl")

# 导入项目模块
import sys
sys.path.append('../src')
from data_preprocessing import DataPreprocessor
from models import LSTMModel, TransformerModel, ModelTrainer, create_data_loaders
from models import train_lstm_model, train_transformer_model, compare_models
from backtest import BacktestEngine

print("环境设置完成！")
print(f"PyTorch版本: {torch.__version__}")
print(f"GPU可用: {torch.cuda.is_available()}")

In [None]:
# %% [markdown]
# ## 2. 数据加载与预处理

# %%
# 初始化数据预处理器
print("加载和预处理数据...")
preprocessor = DataPreprocessor(data_path='../data/raw/all_stocks_5yr.csv')

# 加载原始数据
raw_data = preprocessor.load_data()
print(f"原始数据形状: {raw_data.shape}")

# %%
# 探索性数据概览
print("\n数据概览:")
print(f"时间范围: {raw_data['date'].min()} 到 {raw_data['date'].max()}")
print(f"股票数量: {raw_data['Name'].nunique()}")
print(f"数据列: {list(raw_data.columns)}")

# %%
# 数据清洗和特征工程
print("\n开始数据清洗和特征工程...")
processed_data = preprocessor.clean_data()
print(f"处理后数据形状: {processed_data.shape}")
print(f"特征数量: {len(processed_data.columns)}")

# 保存处理后的数据
preprocessor.save_processed_data('../data/processed/processed_data.csv')

# %%
# 查看处理后的数据
print("\n处理后数据的前5行:")
print(processed_data.head())

print("\n处理后数据的列:")
print(processed_data.columns.tolist())

# %%
# 准备特征和目标变量
print("\n准备特征和目标变量...")
X, y, feature_names = preprocessor.prepare_features_target(
    target_column='daily_return',
    lookback=60,
    prediction_horizon=1
)

print(f"特征矩阵形状: X={X.shape}, y={y.shape}")
print(f"特征数量: {len(feature_names)}")
print(f"特征名称: {feature_names}")

# %%
# 划分训练集和测试集
print("\n划分训练集和测试集...")
X_train, X_test, y_train, y_test = preprocessor.split_train_test(X, y, test_size=0.2)

print(f"训练集: X={X_train.shape}, y={y_train.shape}")
print(f"测试集: X={X_test.shape}, y={y_test.shape}")

# %%
# 特征标准化
print("\n特征标准化...")
X_train_scaled, X_test_scaled, scaler = preprocessor.normalize_features(X_train, X_test)

print(f"标准化后训练集: {X_train_scaled.shape}")
print(f"标准化后测试集: {X_test_scaled.shape}")

In [None]:
# %% [markdown]
# ## 3. LSTM模型训练

# %%
# LSTM模型配置和训练
print("=" * 60)
print("LSTM模型训练")
print("=" * 60)

# 设置LSTM参数
lstm_params = {
    'input_size': X_train_scaled.shape[2],  # 特征数量
    'hidden_size': 128,
    'num_layers': 2,
    'dropout': 0.2,
    'batch_size': 64,
    'learning_rate': 0.001,
    'num_epochs': 100,
    'patience': 15
}

print("LSTM模型参数:")
for key, value in lstm_params.items():
    print(f"  {key}: {value}")

# %%
# 训练LSTM模型
print("\n开始训练LSTM模型...")
lstm_model, lstm_trainer, lstm_history = train_lstm_model(
    X_train_scaled, y_train, X_test_scaled, y_test,
    **lstm_params
)

# %%
# 绘制LSTM训练历史
print("\n绘制LSTM训练历史...")
lstm_trainer.plot_training_history()

# %%
# LSTM模型在测试集上的评估
print("\nLSTM模型测试集评估...")
lstm_predictions = lstm_trainer.predict(X_test_scaled)
lstm_metrics = lstm_trainer._calculate_metrics(y_test, lstm_predictions)

print("LSTM模型性能指标:")
for metric_name, metric_value in lstm_metrics.items():
    print(f"  {metric_name}: {metric_value:.6f}")

# %%
# 绘制LSTM预测结果
print("\n绘制LSTM预测结果...")
lstm_trainer.plot_predictions(y_test[:1000], lstm_predictions[:1000], "_测试集")


In [None]:
# %% [markdown]
# ## 4. Transformer模型训练

# %%
# Transformer模型配置和训练
print("=" * 60)
print("Transformer模型训练")
print("=" * 60)

# 设置Transformer参数
transformer_params = {
    'input_size': X_train_scaled.shape[2],  # 特征数量
    'd_model': 256,
    'nhead': 4,
    'num_layers': 4,
    'dim_feedforward': 512,
    'dropout': 0.1,
    'batch_size': 32,
    'learning_rate': 0.0005,
    'num_epochs': 100,
    'patience': 15
}

print("Transformer模型参数:")
for key, value in transformer_params.items():
    print(f"  {key}: {value}")

# %%
# 训练Transformer模型
print("\n开始训练Transformer模型...")
transformer_model, transformer_trainer, transformer_history = train_transformer_model(
    X_train_scaled, y_train, X_test_scaled, y_test,
    **transformer_params
)

# %%
# 绘制Transformer训练历史
print("\n绘制Transformer训练历史...")
transformer_trainer.plot_training_history()

# %%
# Transformer模型在测试集上的评估
print("\nTransformer模型测试集评估...")
transformer_predictions = transformer_trainer.predict(X_test_scaled)
transformer_metrics = transformer_trainer._calculate_metrics(y_test, transformer_predictions)

print("Transformer模型性能指标:")
for metric_name, metric_value in transformer_metrics.items():
    print(f"  {metric_name}: {metric_value:.6f}")

# %%
# 绘制Transformer预测结果
print("\n绘制Transformer预测结果...")
transformer_trainer.plot_predictions(y_test[:1000], transformer_predictions[:1000], "_测试集")


In [None]:
# %% [markdown]
# ## 5. 模型对比分析

# %%
# 模型性能对比
print("=" * 60)
print("模型性能对比分析")
print("=" * 60)

predictions_dict = {
    'LSTM': lstm_predictions,
    'Transformer': transformer_predictions
}

# 生成对比表格
comparison_df = compare_models(y_test, predictions_dict)
print("\n模型性能对比表格:")
print(comparison_df.to_string(index=False))

# %%
# 可视化模型性能对比
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# 1. 预测值对比散点图
ax1 = axes[0, 0]
sample_size = min(1000, len(y_test))
indices = np.random.choice(len(y_test), sample_size, replace=False)

ax1.scatter(y_test[indices], lstm_predictions[indices], alpha=0.5, s=20, label='LSTM', color='blue')
ax1.scatter(y_test[indices], transformer_predictions[indices], alpha=0.5, s=20, label='Transformer', color='red')
ax1.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2, label='完美预测')
ax1.set_xlabel('真实值')
ax1.set_ylabel('预测值')
ax1.set_title('模型预测对比散点图')
ax1.legend()
ax1.grid(True, alpha=0.3)

# 2. 预测误差分布
ax2 = axes[0, 1]
lstm_errors = y_test - lstm_predictions
transformer_errors = y_test - transformer_predictions

ax2.hist(lstm_errors, bins=50, alpha=0.5, label='LSTM误差', density=True)
ax2.hist(transformer_errors, bins=50, alpha=0.5, label='Transformer误差', density=True)
ax2.axvline(x=0, color='k', linestyle='--', linewidth=2)
ax2.set_xlabel('预测误差')
ax2.set_ylabel('密度')
ax2.set_title('预测误差分布对比')
ax2.legend()
ax2.grid(True, alpha=0.3)

# 3. 关键指标柱状图
ax3 = axes[1, 0]
metrics_to_plot = ['RMSE', 'MAE', 'R²']
lstm_vals = [lstm_metrics['rmse'], lstm_metrics['mae'], lstm_metrics['r2']]
transformer_vals = [transformer_metrics['rmse'], transformer_metrics['mae'], transformer_metrics['r2']]

x = np.arange(len(metrics_to_plot))
width = 0.35

ax3.bar(x - width/2, lstm_vals, width, label='LSTM', color='blue')
ax3.bar(x + width/2, transformer_vals, width, label='Transformer', color='red')

ax3.set_xlabel('指标')
ax3.set_ylabel('值')
ax3.set_title('关键性能指标对比')
ax3.set_xticks(x)
ax3.set_xticklabels(metrics_to_plot)
ax3.legend()
ax3.grid(True, alpha=0.3, axis='y')

# 4. 残差Q-Q图（检查正态性）
ax4 = axes[1, 1]
from scipy import stats

stats.probplot(lstm_errors, dist="norm", plot=ax4)
ax4.get_lines()[0].set_color('blue')
ax4.get_lines()[0].set_alpha(0.5)
ax4.get_lines()[1].set_color('red')
ax4.get_lines()[1].set_linewidth(2)

# 添加第二个模型的Q-Q图
stats.probplot(transformer_errors, dist="norm", plot=ax4)
ax4.get_lines()[2].set_color('green')
ax4.get_lines()[2].set_alpha(0.5)
ax4.get_lines()[3].set_color('red')

# 自定义图例
from matplotlib.lines import Line2D
custom_lines = [Line2D([0], [0], color='blue', lw=2),
                Line2D([0], [0], color='green', lw=2),
                Line2D([0], [0], color='red', lw=2)]
ax4.legend(custom_lines, ['LSTM', 'Transformer', '理论正态线'])
ax4.set_title('残差Q-Q图（正态性检验）')
ax4.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('../results/figures/model_comparison_analysis.png', dpi=300, bbox_inches='tight')
plt.show()


In [None]:
# %% [markdown]
# ## 6. 交易策略回测

# %%
# 准备回测数据
print("=" * 60)
print("交易策略回测")
print("=" * 60)

# 获取测试集对应的价格数据
# 由于我们的数据是按时间序列窗口组织的，需要提取对应的原始价格
# 这里简化处理：使用处理后的数据中对应测试集时间段的收盘价

# 获取测试集时间段的索引
test_start_idx = len(processed_data) - len(y_test) - 60  # 减去lookback窗口
test_end_idx = len(processed_data)

# 提取测试集时间段的价格数据
test_prices = processed_data.iloc[test_start_idx:test_end_idx]['close'].values
test_dates = processed_data.iloc[test_start_idx:test_end_idx]['date'].values

print(f"测试集价格数据形状: {test_prices.shape}")
print(f"测试集日期范围: {test_dates[0]} 到 {test_dates[-1]}")
print(f"测试集价格范围: ${test_prices.min():.2f} - ${test_prices.max():.2f}")

# 确保预测数据与价格数据长度一致
min_len = min(len(test_prices), len(lstm_predictions))
test_prices_aligned = test_prices[:min_len]
lstm_predictions_aligned = lstm_predictions[:min_len]
transformer_predictions_aligned = transformer_predictions[:min_len]

print(f"\n对齐后数据长度: {min_len}")

# %%
# 基于LSTM预测的回测
print("\n" + "=" * 30)
print("LSTM策略回测")
print("=" * 30)

# 创建回测引擎
lstm_engine = BacktestEngine(
    initial_capital=100000.0,
    commission=0.001,  # 0.1% 交易佣金
    slippage=0.0005    # 0.05% 滑点
)

# 运行回测（使用机器学习增强策略）
lstm_results = lstm_engine.run_backtest(
    test_prices_aligned,
    lstm_predictions_aligned,
    strategy_type='ml_enhanced'
)

# 生成报告
lstm_report = lstm_engine.generate_report("LSTM增强策略")

print("\nLSTM策略回测结果摘要:")
for key, value in lstm_report['summary'].items():
    print(f"  {key}: {value}")

# 绘制回测结果
lstm_engine.plot_results('../results/figures/backtest_lstm_enhanced.png')

# %%
# 基于Transformer预测的回测
print("\n" + "=" * 30)
print("Transformer策略回测")
print("=" * 30)

# 创建回测引擎
transformer_engine = BacktestEngine(
    initial_capital=100000.0,
    commission=0.001,
    slippage=0.0005
)

# 运行回测
transformer_results = transformer_engine.run_backtest(
    test_prices_aligned,
    transformer_predictions_aligned,
    strategy_type='ml_enhanced'
)

# 生成报告
transformer_report = transformer_engine.generate_report("Transformer增强策略")

print("\nTransformer策略回测结果摘要:")
for key, value in transformer_report['summary'].items():
    print(f"  {key}: {value}")

# 绘制回测结果
transformer_engine.plot_results('../results/figures/backtest_transformer_enhanced.png')

# %%
# 策略性能对比
print("\n" + "=" * 30)
print("策略性能对比")
print("=" * 30)

# 创建对比表格
strategy_comparison = pd.DataFrame({
    '指标': ['初始资金', '最终资金', '总收益率', '年化收益率', 
            '夏普比率', '最大回撤', '胜率', '交易次数'],
    'LSTM策略': [
        f"${lstm_results['initial_capital']:,.2f}",
        f"${lstm_results['final_capital']:,.2f}",
        f"{lstm_results['total_return']:.2%}",
        f"{lstm_results['annual_return']:.2%}",
        f"{lstm_results['sharpe_ratio']:.3f}",
        f"{lstm_results['max_drawdown']:.2%}",
        f"{lstm_results['win_rate']:.2%}",
        f"{lstm_results['trade_count']}"
    ],
    'Transformer策略': [
        f"${transformer_results['initial_capital']:,.2f}",
        f"${transformer_results['final_capital']:,.2f}",
        f"{transformer_results['total_return']:.2%}",
        f"{transformer_results['annual_return']:.2%}",
        f"{transformer_results['sharpe_ratio']:.3f}",
        f"{transformer_results['max_drawdown']:.2%}",
        f"{transformer_results['win_rate']:.2%}",
        f"{transformer_results['trade_count']}"
    ]
})

print("策略回测性能对比:")
print(strategy_comparison.to_string(index=False))

# %%
# 可视化策略对比
fig, axes = plt.subplots(2, 3, figsize=(18, 12))

# 1. 资金曲线对比
axes[0, 0].plot(lstm_results['capital_history'], label='LSTM策略', linewidth=2)
axes[0, 0].plot(transformer_results['capital_history'], label='Transformer策略', linewidth=2)
axes[0, 0].axhline(y=100000, color='gray', linestyle='--', alpha=0.7, label='初始资金')
axes[0, 0].set_xlabel('交易日')
axes[0, 0].set_ylabel('资金 ($)')
axes[0, 0].set_title('资金曲线对比')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# 2. 累计收益率对比
lstm_cum_returns = (lstm_results['capital_history'] - 100000) / 100000
transformer_cum_returns = (transformer_results['capital_history'] - 100000) / 100000

axes[0, 1].plot(lstm_cum_returns * 100, label='LSTM策略', linewidth=2)
axes[0, 1].plot(transformer_cum_returns * 100, label='Transformer策略', linewidth=2)
axes[0, 1].axhline(y=0, color='gray', linestyle='-', alpha=0.5)
axes[0, 1].set_xlabel('交易日')
axes[0, 1].set_ylabel('累计收益率 (%)')
axes[0, 1].set_title('累计收益率对比')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# 3. 回撤曲线对比
axes[0, 2].plot(lstm_results['drawdown_history'] * 100, label='LSTM策略', linewidth=2, color='blue')
axes[0, 2].plot(transformer_results['drawdown_history'] * 100, label='Transformer策略', linewidth=2, color='red')
axes[0, 2].fill_between(range(len(lstm_results['drawdown_history'])), 
                         lstm_results['drawdown_history'] * 100, 0, alpha=0.2, color='blue')
axes[0, 2].fill_between(range(len(transformer_results['drawdown_history'])), 
                         transformer_results['drawdown_history'] * 100, 0, alpha=0.2, color='red')
axes[0, 2].set_xlabel('交易日')
axes[0, 2].set_ylabel('回撤 (%)')
axes[0, 2].set_title('回撤曲线对比')
axes[0, 2].legend()
axes[0, 2].grid(True, alpha=0.3)

# 4. 夏普比率和最大回撤对比
metrics_for_bar = ['夏普比率', '最大回撤 (%)', '年化收益率 (%)']
lstm_bar_vals = [lstm_results['sharpe_ratio'], 
                 lstm_results['max_drawdown'] * 100, 
                 lstm_results['annual_return'] * 100]
transformer_bar_vals = [transformer_results['sharpe_ratio'], 
                        transformer_results['max_drawdown'] * 100, 
                        transformer_results['annual_return'] * 100]

x = np.arange(len(metrics_for_bar))
width = 0.35

axes[1, 0].bar(x - width/2, lstm_bar_vals, width, label='LSTM策略', color='blue')
axes[1, 0].bar(x + width/2, transformer_bar_vals, width, label='Transformer策略', color='red')
axes[1, 0].set_xlabel('指标')
axes[1, 0].set_ylabel('值')
axes[1, 0].set_title('关键风险收益指标对比')
axes[1, 0].set_xticks(x)
axes[1, 0].set_xticklabels(metrics_for_bar)
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3, axis='y')

# 5. 交易次数和胜率对比
trades_win_metrics = ['交易次数', '胜率 (%)']
lstm_trades_win = [lstm_results['trade_count'], lstm_results['win_rate'] * 100]
transformer_trades_win = [transformer_results['trade_count'], transformer_results['win_rate'] * 100]

x2 = np.arange(len(trades_win_metrics))

axes[1, 1].bar(x2 - width/2, lstm_trades_win, width, label='LSTM策略', color='blue')
axes[1, 1].bar(x2 + width/2, transformer_trades_win, width, label='Transformer策略', color='red')
axes[1, 1].set_xlabel('指标')
axes[1, 1].set_ylabel('值')
axes[1, 1].set_title('交易统计对比')
axes[1, 1].set_xticks(x2)
axes[1, 1].set_xticklabels(trades_win_metrics)
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3, axis='y')

# 6. 收益率分布对比
axes[1, 2].hist(lstm_results['returns_history'] * 100, bins=50, alpha=0.5, label='LSTM策略', density=True)
axes[1, 2].hist(transformer_results['returns_history'] * 100, bins=50, alpha=0.5, label='Transformer策略', density=True)
axes[1, 2].axvline(x=0, color='k', linestyle='--', linewidth=2)
axes[1, 2].set_xlabel('日收益率 (%)')
axes[1, 2].set_ylabel('密度')
axes[1, 2].set_title('日收益率分布对比')
axes[1, 2].legend()
axes[1, 2].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('../results/figures/strategy_comparison.png', dpi=300, bbox_inches='tight')
plt.show()


In [None]:
# %% [markdown]
# ## 7. 特征重要性分析

# %%
# 特征重要性分析（基于模型权重）
print("=" * 60)
print("特征重要性分析")
print("=" * 60)

def analyze_feature_importance(model, feature_names, model_type='LSTM'):
    """分析模型特征重要性"""
    
    if model_type == 'LSTM':
        # 对于LSTM，我们可以查看第一层LSTM的权重
        lstm_weights = model.lstm.weight_ih_l0.detach().numpy()
        # 计算每个输入特征的权重绝对值的平均值
        importance_scores = np.abs(lstm_weights).mean(axis=0)
    
    elif model_type == 'Transformer':
        # 对于Transformer，查看输入投影层的权重
        proj_weights = model.input_projection.weight.detach().numpy()
        importance_scores = np.abs(proj_weights).mean(axis=0)
    
    else:
        raise ValueError(f"不支持的模型类型: {model_type}")
    
    # 创建重要性DataFrame
    importance_df = pd.DataFrame({
        '特征': feature_names,
        '重要性': importance_scores
    })
    
    # 按重要性排序
    importance_df = importance_df.sort_values('重要性', ascending=False)
    
    return importance_df

# 分析LSTM特征重要性
lstm_importance = analyze_feature_importance(lstm_model, feature_names, 'LSTM')
print("\nLSTM模型特征重要性排名:")
print(lstm_importance.head(10))

# 分析Transformer特征重要性
transformer_importance = analyze_feature_importance(transformer_model, feature_names, 'Transformer')
print("\nTransformer模型特征重要性排名:")
print(transformer_importance.head(10))

# %%
# 可视化特征重要性
fig, axes = plt.subplots(1, 2, figsize=(16, 8))

# LSTM特征重要性
axes[0].barh(range(10), lstm_importance['重要性'].head(10).values[::-1], color='blue')
axes[0].set_yticks(range(10))
axes[0].set_yticklabels(lstm_importance['特征'].head(10).values[::-1])
axes[0].set_xlabel('重要性分数')
axes[0].set_title('LSTM模型特征重要性 (Top 10)')
axes[0].grid(True, alpha=0.3, axis='x')

# Transformer特征重要性
axes[1].barh(range(10), transformer_importance['重要性'].head(10).values[::-1], color='red')
axes[1].set_yticks(range(10))
axes[1].set_yticklabels(transformer_importance['特征'].head(10).values[::-1])
axes[1].set_xlabel('重要性分数')
axes[1].set_title('Transformer模型特征重要性 (Top 10)')
axes[1].grid(True, alpha=0.3, axis='x')

plt.tight_layout()
plt.savefig('../results/figures/feature_importance.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# %% [markdown]
# ## 8. 模型可解释性分析

# %%
# SHAP值分析（需要安装shap库）
try:
    import shap
    
    print("进行SHAP值分析...")
    
    # 选择一小部分样本进行分析（SHAP计算较慢）
    sample_size = 100
    X_sample = X_test_scaled[:sample_size].reshape(sample_size, -1)
    
    # 创建SHAP解释器
    explainer = shap.KernelExplainer(
        lambda x: lstm_trainer.predict(x.reshape(sample_size, 60, len(feature_names))),
        X_sample
    )
    
    # 计算SHAP值
    shap_values = explainer.shap_values(X_sample, nsamples=50)
    
    # 可视化SHAP摘要图
    plt.figure(figsize=(12, 8))
    shap.summary_plot(shap_values, X_sample, feature_names=feature_names*60, show=False)
    plt.title('LSTM模型SHAP值摘要图')
    plt.tight_layout()
    plt.savefig('../results/figures/shap_summary.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    print("SHAP分析完成！")
    
except ImportError:
    print("SHAP库未安装，跳过SHAP分析。")
    print("安装命令: pip install shap")

In [None]:
# %% [markdown]
# ## 9. 敏感性分析

# %%
# 策略参数敏感性分析
print("=" * 60)
print("策略参数敏感性分析")
print("=" * 60)

def analyze_strategy_sensitivity(predictions, prices, param_name, param_values):
    """分析策略参数敏感性"""
    
    results = []
    
    for param_value in param_values:
        # 创建回测引擎
        engine = BacktestEngine(initial_capital=100000.0)
        
        # 根据参数名称调整策略
        if param_name == 'commission':
            engine.commission = param_value
            strategy_type = 'simple'
        elif param_name == 'stop_loss':
            # 这里需要修改回测引擎以支持不同的止损参数
            # 简化处理：使用固定策略
            strategy_type = 'simple'
        else:
            strategy_type = 'simple'
        
        # 运行回测
        result = engine.run_backtest(prices, predictions, strategy_type=strategy_type)
        
        results.append({
            param_name: param_value,
            '最终资金': result['final_capital'],
            '总收益率': result['total_return'],
            '夏普比率': result['sharpe_ratio'],
            '最大回撤': result['max_drawdown']
        })
    
    return pd.DataFrame(results)

# 分析交易佣金的敏感性
print("\n分析交易佣金敏感性...")
commission_values = [0.0005, 0.001, 0.002, 0.005, 0.01]  # 0.05% 到 1%
commission_sensitivity = analyze_strategy_sensitivity(
    lstm_predictions_aligned[:500],  # 使用部分数据加快计算
    test_prices_aligned[:500],
    'commission',
    commission_values
)

print("佣金敏感性分析结果:")
print(commission_sensitivity)

# %%
# 可视化敏感性分析
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# 佣金对最终资金的影响
axes[0, 0].plot(commission_sensitivity['commission'] * 100, 
                commission_sensitivity['最终资金'], 'o-', linewidth=2)
axes[0, 0].set_xlabel('交易佣金 (%)')
axes[0, 0].set_ylabel('最终资金 ($)')
axes[0, 0].set_title('佣金对最终资金的影响')
axes[0, 0].grid(True, alpha=0.3)

# 佣金对总收益率的影响
axes[0, 1].plot(commission_sensitivity['commission'] * 100, 
                commission_sensitivity['总收益率'] * 100, 'o-', linewidth=2, color='green')
axes[0, 1].set_xlabel('交易佣金 (%)')
axes[0, 1].set_ylabel('总收益率 (%)')
axes[0, 1].set_title('佣金对总收益率的影响')
axes[0, 1].grid(True, alpha=0.3)

# 佣金对夏普比率的影响
axes[1, 0].plot(commission_sensitivity['commission'] * 100, 
                commission_sensitivity['夏普比率'], 'o-', linewidth=2, color='orange')
axes[1, 0].set_xlabel('交易佣金 (%)')
axes[1, 0].set_ylabel('夏普比率')
axes[1, 0].set_title('佣金对夏普比率的影响')
axes[1, 0].grid(True, alpha=0.3)

# 佣金对最大回撤的影响
axes[1, 1].plot(commission_sensitivity['commission'] * 100, 
                commission_sensitivity['最大回撤'] * 100, 'o-', linewidth=2, color='red')
axes[1, 1].set_xlabel('交易佣金 (%)')
axes[1, 1].set_ylabel('最大回撤 (%)')
axes[1, 1].set_title('佣金对最大回撤的影响')
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('../results/figures/commission_sensitivity.png', dpi=300, bbox_inches='tight')
plt.show()


In [None]:
# %% [markdown]
# ## 10. 结论与建议

# %%
# 生成最终分析报告
print("=" * 60)
print("项目总结与建议")
print("=" * 60)

# 收集关键结果
key_results = {
    '数据准备': {
        '原始数据规模': f"{raw_data.shape[0]:,} 行 × {raw_data.shape[1]} 列",
        '处理后的特征数量': len(feature_names),
        '时间序列窗口': "60天历史数据预测1天收益率",
        '训练集大小': f"{X_train_scaled.shape[0]:,} 个样本",
        '测试集大小': f"{X_test_scaled.shape[0]:,} 个样本"
    },
    '模型性能': {
        '最佳LSTM验证损失': f"{min(lstm_history['val_loss']):.6f}",
        '最佳Transformer验证损失': f"{min(transformer_history['val_loss']):.6f}",
        'LSTM测试集R²': f"{lstm_metrics['r2']:.4f}",
        'Transformer测试集R²': f"{transformer_metrics['r2']:.4f}",
        '表现更好的模型': 'LSTM' if lstm_metrics['rmse'] < transformer_metrics['rmse'] else 'Transformer'
    },
    '回测结果': {
        '最佳策略': 'LSTM策略' if lstm_results['total_return'] > transformer_results['total_return'] else 'Transformer策略',
        'LSTM策略总收益率': f"{lstm_results['total_return']:.2%}",
        'Transformer策略总收益率': f"{transformer_results['total_return']:.2%}",
        '最佳夏普比率': max(lstm_results['sharpe_ratio'], transformer_results['sharpe_ratio']),
        '最低最大回撤': min(lstm_results['max_drawdown'], transformer_results['max_drawdown'])
    },
    '特征重要性': {
        'LSTM最重要特征': lstm_importance.iloc[0]['特征'],
        'Transformer最重要特征': transformer_importance.iloc[0]['特征'],
        '共同重要特征': set(lstm_importance.head(5)['特征']) & set(transformer_importance.head(5)['特征'])
    }
}

# 打印总结
print("\n1. 数据准备总结:")
for key, value in key_results['数据准备'].items():
    print(f"   - {key}: {value}")

print("\n2. 模型性能总结:")
for key, value in key_results['模型性能'].items():
    print(f"   - {key}: {value}")

print("\n3. 回测结果总结:")
for key, value in key_results['回测结果'].items():
    print(f"   - {key}: {value}")

print("\n4. 特征重要性总结:")
for key, value in key_results['特征重要性'].items():
    print(f"   - {key}: {value}")

# %%
# 保存所有结果
print("\n" + "=" * 60)
print("保存结果")
print("=" * 60)

import os
import json

# 确保目录存在
os.makedirs('../results/summary', exist_ok=True)

# 保存关键结果到JSON文件
with open('../results/summary/key_results.json', 'w') as f:
    json.dump(key_results, f, indent=4, default=str)

print("✓ 关键结果已保存到: ../results/summary/key_results.json")

# 保存模型性能对比
comparison_df.to_csv('../results/summary/model_comparison.csv', index=False)
print("✓ 模型性能对比已保存到: ../results/summary/model_comparison.csv")

# 保存策略对比
strategy_comparison.to_csv('../results/summary/strategy_comparison.csv', index=False)
print("✓ 策略性能对比已保存到: ../results/summary/strategy_comparison.csv")

# 保存特征重要性
lstm_importance.to_csv('../results/summary/lstm_feature_importance.csv', index=False)
transformer_importance.to_csv('../results/summary/transformer_feature_importance.csv', index=False)
print("✓ 特征重要性分析已保存")

# 保存敏感性分析
commission_sensitivity.to_csv('../results/summary/commission_sensitivity.csv', index=False)
print("✓ 敏感性分析已保存到: ../results/summary/commission_sensitivity.csv")

# 生成最终报告
final_report = f"""# 量化策略建模项目最终报告