<a href="https://colab.research.google.com/github/nanpolend/machine-learning/blob/master/%E5%88%A9%E7%94%A8ai%E9%A0%90%E8%A8%93%E7%B7%B4%E6%A8%A1%E5%9E%8B%E9%A0%90%E6%B8%AC%E9%BB%83%E9%87%91%E8%B5%B0%E5%8B%A2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 核心模块划分
+-------------------+
|    DataManager    |  # 统一数据接口
+-------------------+
|  FeatureEngineer  |  # 动态特征生成
+-------------------+
|   GoldPredictor   |  # 模型集成核心
+-------------------+
|      main()       |  # 流程控制器
+-------------------+

In [None]:
# -*- coding: utf-8 -*-
# Google Colab 黄金价格预测系统 (最终稳定版)

# ====================
# 环境配置 (必须首先运行)
# ====================
!pip install --force-reinstall --no-deps \
numpy==1.23.5 \
pandas==1.5.3 \
tensorflow==2.12.0 \
keras==2.12.0 \
xgboost==1.7.6 \
scikit-learn==1.2.2 \
pandas_ta==0.3.14b0 \
matplotlib==3.7.1 \
shap==0.44.1 \
requests==2.31.0

import os
os.kill(os.getpid(), 9)  # 自动重启运行时

# ====================
# 正式代码 (重启后运行)
# ====================
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import requests
import matplotlib.pyplot as plt
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import TimeSeriesSplit
from xgboost import XGBRegressor
import shap
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# ====================
# 系统配置
# ====================
class Config:
    SYMBOL = "XAUUSD"
    API_KEY = "DEMO_KEY"  # 替换为真实密钥
    LOOKBACK = 30         # LSTM时间窗口
    TRAIN_DAYS = 1000     # 训练数据天数
    FEATURES = [          # 使用的特征列
        'RSI_14', 'MACD_12_26_9',
        'BBU_20_2.0', 'BBL_20_2_0',
        'EMA_50', 'volatility'
    ]
    TARGET = 'close'      # 预测目标

# ====================
# 智能数据管理器
# ====================
class DataManager:
    @staticmethod
    def fetch_data(url, params, is_historical=True):
        """多功能数据获取方法"""
        try:
            response = requests.get(url, params=params, timeout=15)
            if response.ok:
                data = response.json()
                # 修正条件判断逻辑
                df_data = data['data'] if is_historical else [data]
                df = pd.DataFrame(df_data)
                return DataManager._process_data(df, is_historical)
            return DataManager.generate_data(is_historical)
        except Exception as e:
            print(f"数据获取异常: {str(e)}")
            return DataManager.generate_data(is_historical)

    @staticmethod
    def _process_data(df, is_historical):
        """数据标准化处理"""
        df = df.rename(columns={
            'Timestamp': 'timestamp',
            'Open': 'open',
            'High': 'high',
            'Low': 'low',
            'Close': 'close',
            'Volume': 'volume'
        })
        df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
        return df.set_index('timestamp').astype(float)

    @staticmethod
    def generate_data(is_historical):
        """智能数据生成"""
        days = Config.TRAIN_DAYS if is_historical else 1
        dates = pd.date_range(end=pd.Timestamp.now(), periods=days, freq='D')
        base = 1800 + np.random.normal(0, 50, days).cumsum()
        return pd.DataFrame({
            'open': base + np.random.randint(-20,20,days),
            'high': base + np.random.randint(0,30,days),
            'low': base - np.random.randint(0,30,days),
            'close': base,
            'volume': np.random.poisson(10000, days)
        }, index=dates)

# ====================
# 高级特征工程
# ====================
class FeatureEngineer:
    @staticmethod
    def add_features(df):
        """动态特征生成器"""
        # 技术指标
        df.ta.rsi(length=14, append=True)
        df.ta.macd(fast=12, slow=26, signal=9, append=True)
        df.ta.bbands(length=20, append=True)
        df.ta.ema(length=50, append=True)

        # 自定义特征
        df['volatility'] = df['high'] - df['low']
        df['momentum_5'] = df['close'].pct_change(5)
        df['gap'] = df['open'] - df['close'].shift(1)

        # 清理数据
        return df.dropna().copy()[Config.FEATURES + [Config.TARGET]]

# ====================
# 集成预测模型
# ====================
class GoldPredictor:
    def __init__(self):
        self.scaler = RobustScaler()
        self.models = self._build_models()

    def _build_models(self):
        """多模型架构"""
        return {
            'xgb': XGBRegressor(
                objective='reg:squarederror',
                n_estimators=200,
                learning_rate=0.05,
                max_depth=5
            ),
            'lstm': Sequential([
                LSTM(64, return_sequences=True,
                    input_shape=(Config.LOOKBACK, len(Config.FEATURES))),
                Dropout(0.3),
                LSTM(32),
                Dropout(0.3),
                Dense(1)
            ])
        }

    def train(self, data):
        """训练流程"""
        df = FeatureEngineer.add_features(data)
        scaled = self.scaler.fit_transform(df)

        # XGBoost训练
        X, y = scaled[:, :-1], scaled[:, -1]
        self.models['xgb'].fit(X, y)

        # LSTM训练
        X_lstm = np.array([
            scaled[i-Config.LOOKBACK:i, :-1]
            for i in range(Config.LOOKBACK, len(scaled))
        ])
        self.models['lstm'].compile(optimizer='adam', loss='mse')
        self.models['lstm'].fit(
            X_lstm, y[Config.LOOKBACK:],
            epochs=30,
            batch_size=16,
            verbose=0
        )

    def predict(self, data):
        """集成预测"""
        full_data = FeatureEngineer.add_features(data)
        scaled = self.scaler.transform(full_data)

        # 双模型预测
        xgb_pred = self.models['xgb'].predict(scaled[:, :-1])
        lstm_input = scaled[-Config.LOOKBACK:, :-1].reshape(1, Config.LOOKBACK, -1)
        lstm_pred = self.models['lstm'].predict(lstm_input)

        # 加权融合
        return (xgb_pred[-1] * 0.6) + (lstm_pred[0][0] * 0.4)

    def explain(self, sample):
        """可解释性分析"""
        explainer = shap.TreeExplainer(self.models['xgb'])
        shap_values = explainer.shap_values(sample[Config.FEATURES])
        plt.figure(figsize=(10,6))
        shap.summary_plot(shap_values, Config.FEATURES, show=False)
        plt.title('特征影响力分析', fontsize=14)
        plt.tight_layout()
        plt.show()

# ====================
# 主控系统
# ====================
def main():
    print("🟢 系统初始化...")

    # 数据加载
    print("\n📥 获取历史数据...")
    historical_data = DataManager.fetch_data(
        url="https://api.alltick.co/v1/history",
        params={
            "symbol": Config.SYMBOL,
            "interval": "1d",
            "apikey": Config.API_KEY,
            "limit": Config.TRAIN_DAYS
        },
        is_historical=True
    )

    # 模型训练
    print("\n🎯 训练模型中...")
    predictor = GoldPredictor()
    try:
        predictor.train(historical_data)
        print("✅ 模型训练成功")
    except Exception as e:
        print(f"❌ 训练失败: {str(e)}")
        return

    # 实时预测
    print("\n🔮 执行实时预测...")
    realtime_data = DataManager.fetch_data(
        url="https://api.alltick.co/v1/quote",
        params={"symbol": Config.SYMBOL, "apikey": Config.API_KEY},
        is_historical=False
    )

    if not realtime_data.empty:
        try:
            combined_data = pd.concat([historical_data, realtime_data])
            prediction = predictor.predict(combined_data)

            print("\n=== 预测结果 ===")
            print(f"📈 预测价格: ${prediction:.2f}")
            print(f"🕒 实时价格: ${realtime_data['close'].iloc[0]:.2f}")
            print(f"🔀 价格差异: {(prediction - realtime_data['close'].iloc[0]):.2f} USD")

            # 特征解释
            predictor.explain(historical_data.sample(50))
        except Exception as e:
            print(f"⚠️ 预测异常: {str(e)}")
    else:
        print("⚠️ 实时数据获取失败")

if __name__ == "__main__":
    main()