In [1]:
import logging
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
from pathlib import Path
from typing import Dict, List, Optional, Tuple
from dataclasses import dataclass, field
from typing import Dict

In [2]:
@dataclass
class PredictionConfig:
    """預測系統配置"""
    # 基礎路徑設定
    BASE_DIR: str = field(default="D:/Min/Python/Project/FA_Data")
    META_DATA_DIR: str = field(default="meta_data")
    MODEL_DIR: str = field(default="models")
    PREDICTION_DIR: str = field(default="predictions")
    
    # 預測參數
    PREDICTION_PARAMS: Dict = field(
        default_factory=lambda: {
            'confidence_threshold': 0.65,  # 預測信心閾值
            'min_probability': 0.55,  # 最小預測機率
            'lookback_days': 30,  # 回顧天數
            'prediction_horizon': 5  # 預測天數
        }
    )
    
    # 信號參數
    SIGNAL_PARAMS: Dict = field(
        default_factory=lambda: {
            'buy': {
                'rsi_lower': 30,  # RSI超賣閾值
                'volume_ratio': 1.5,  # 量能閾值
                'trend_strength': 0.05  # 趨勢強度閾值
            },
            'sell': {
                'rsi_upper': 70,  # RSI超買閾值
                'profit_target': 0.15,  # 獲利目標
                'stop_loss': -0.1  # 停損限制
            }
        }
    )
    
    # 風險參數
    RISK_PARAMS: Dict = field(
        default_factory=lambda: {
            'max_positions': 20,  # 最大持倉數
            'single_position_limit': 0.15,  # 單一部位限制
            'industry_exposure_limit': 0.30  # 產業曝險限制
        }
    )

# 測試配置
if __name__ == "__main__":
    config = PredictionConfig()
    print("基礎路徑:", config.BASE_DIR)
    print("預測參數:", config.PREDICTION_PARAMS)
    print("信號參數:", config.SIGNAL_PARAMS)
    print("風險參數:", config.RISK_PARAMS)

基礎路徑: D:/Min/Python/Project/FA_Data
預測參數: {'confidence_threshold': 0.65, 'min_probability': 0.55, 'lookback_days': 30, 'prediction_horizon': 5}
信號參數: {'buy': {'rsi_lower': 30, 'volume_ratio': 1.5, 'trend_strength': 0.05}, 'sell': {'rsi_upper': 70, 'profit_target': 0.15, 'stop_loss': -0.1}}
風險參數: {'max_positions': 20, 'single_position_limit': 0.15, 'industry_exposure_limit': 0.3}


In [3]:
class PredictionSystem:
    """交易預測系統"""
    
    def __init__(self, config: PredictionConfig):
        """初始化預測系統"""
        self.config = config
        self.logger = self._setup_logger()
        self.base_path = Path(config.BASE_DIR)
        self._initialize_paths()
        
    def _setup_logger(self) -> logging.Logger:
        """設置日誌系統"""
        logger = logging.getLogger('PredictionSystem')
        logger.setLevel(logging.INFO)
        
        # 確保不重複添加處理器
        if not logger.handlers:
            # 檔案處理器
            fh = logging.FileHandler('prediction_system.log', encoding='utf-8')
            fh.setLevel(logging.INFO)
            
            # 控制台處理器
            ch = logging.StreamHandler()
            ch.setLevel(logging.INFO)
            
            # 設定格式
            formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
            fh.setFormatter(formatter)
            ch.setFormatter(formatter)
            
            logger.addHandler(fh)
            logger.addHandler(ch)
        
        return logger
    
    def _initialize_paths(self):
        """初始化必要的路徑"""
        self.meta_data_path = self.base_path / self.config.META_DATA_DIR
        self.model_path = self.base_path / self.config.MODEL_DIR
        self.prediction_path = self.base_path / self.config.PREDICTION_DIR
        
        # 確保必要目錄存在
        self.prediction_path.mkdir(parents=True, exist_ok=True)
    
    def load_stock_data(self, stock_id: str) -> Optional[pd.DataFrame]:
        """載入股票資料"""
        try:
            # 載入增強特徵數據
            feature_path = self.meta_data_path / "enhanced_features.csv"
            df = pd.read_csv(feature_path, low_memory=False)
            
            # 確保證券代號為字符串類型
            df['證券代號'] = df['證券代號'].astype(str)
            
            # 篩選特定股票的數據
            df = df[df['證券代號'] == stock_id].copy()
            
            # 確保日期格式正確
            df['日期'] = pd.to_datetime(df['日期'])
            
            # 按日期排序
            df = df.sort_values('日期')
            
            # 檢查必要欄位是否存在且有值
            required_columns = ['RSI', '量比', '趨勢強度', '技術綜合評分']
            
            # 檢查數據完整性
            if df.empty:
                self.logger.warning(f"股票 {stock_id} 沒有可用的數據")
                return None
                
            missing_columns = [col for col in required_columns if col not in df.columns]
            if missing_columns:
                self.logger.warning(f"股票 {stock_id} 缺少必要欄位: {missing_columns}")
                return None
                
            # 檢查最新數據是否完整
            latest_data = df.iloc[-1]
            null_columns = [col for col in required_columns if pd.isna(latest_data[col])]
            if null_columns:
                self.logger.warning(f"股票 {stock_id} 最新數據中有缺失值: {null_columns}")
                return None
    
            return df
                
        except Exception as e:
            self.logger.error(f"載入股票 {stock_id} 資料時發生錯誤: {str(e)}")
            return None
    
    def generate_signals(self, df: pd.DataFrame) -> pd.DataFrame:
        """產生交易信號"""
        try:
            df = df.copy()
            buy_params = self.config.SIGNAL_PARAMS['buy']
            sell_params = self.config.SIGNAL_PARAMS['sell']
            
            # 使用已經計算好的特徵
            # RSI信號
            df['RSI信號'] = 0
            df.loc[df['RSI'] < buy_params['rsi_lower'], 'RSI信號'] = 1
            df.loc[df['RSI'] > sell_params['rsi_upper'], 'RSI信號'] = -1
            
            # 量能信號
            df['量能信號'] = 0
            df.loc[df['量比'] > buy_params['volume_ratio'], '量能信號'] = 1
            
            # 趨勢信號
            df['趨勢信號'] = 0
            df.loc[df['趨勢強度'] > buy_params['trend_strength'], '趨勢信號'] = 1
            df.loc[df['趨勢強度'] < -buy_params['trend_strength'], '趨勢信號'] = -1
            
            # 綜合信號 (考慮技術綜合評分)
            df['交易信號'] = (
                df['RSI信號'] + 
                df['量能信號'] + 
                df['趨勢信號'] + 
                (df['技術綜合評分'] - 0.5) * 2  # 將技術評分轉換為 -1 到 1
            ).clip(-1, 1)
            
            return df
            
        except Exception as e:
            self.logger.error(f"產生交易信號時發生錯誤: {str(e)}")
            return df
    
    def calculate_prediction_probability(self, df: pd.DataFrame) -> Tuple[float, float]:
        """計算預測機率和信心分數"""
        try:
            # 獲取最新數據
            latest_data = df.iloc[-1]
            
            # 使用已經計算好的綜合指標
            tech_score = latest_data['技術綜合評分']
            
            # 綜合其他重要指標
            momentum_score = (
                latest_data['RSI_動能'] +
                latest_data['MACD_動能'] +
                latest_data['趨勢動能']
            ) / 3
            
            # 計算綜合預測機率
            probability = (tech_score + momentum_score + 1) / 2
            probability = np.clip(probability, 0, 1)
            
            # 計算信心分數 (基於多個指標的一致性)
            signal_consistency = 1 - abs(
                latest_data['RSI信號'] - 
                latest_data['量能信號'] - 
                latest_data['趨勢信號']
            ) / 3
            
            confidence = signal_consistency * (1 - abs(0.5 - probability) * 2)
            
            return probability, confidence
            
        except Exception as e:
            self.logger.error(f"計算預測機率時發生錯誤: {str(e)}")
            return 0.5, 0.0
    
    def generate_prediction(self, stock_id: str) -> Dict:
        """生成預測結果"""
        try:
            # 載入數據
            df = self.load_stock_data(stock_id)
            if df is None:
                self.logger.warning(f"股票 {stock_id} 數據不完整，無法進行預測")
                return {}
                
            # 檢查數據是否足夠新
            latest_date = df['日期'].max()
            date_diff = (pd.Timestamp.now() - latest_date).days
            if date_diff > 5:  # 如果數據超過5天沒更新
                self.logger.warning(f"股票 {stock_id} 數據可能過時，最新日期: {latest_date}")
                return {}
            
            # 生成信號
            df = self.generate_signals(df)
            
            # 計算預測機率和信心分數
            probability, confidence = self.calculate_prediction_probability(df)
            
            # 獲取最新數據
            latest_data = df.iloc[-1]
            
            # 安全獲取數值的輔助函數
            def safe_float(value):
                try:
                    return float(value) if pd.notna(value) else None
                except:
                    return None
            
            # 整理預測結果
            prediction = {
                'stock_id': stock_id,
                'date': latest_data['日期'].strftime('%Y-%m-%d'),
                'probability': probability,
                'confidence': confidence,
                'signal': int(latest_data.get('交易信號', 0))  # 如果沒有信號，預設為0
            }
            
            # 技術指標
            technical_indicators = {}
            for indicator in ['RSI', 'MACD', 'KD_差值', '趨勢強度', '量比']:
                if indicator in latest_data:
                    value = safe_float(latest_data[indicator])
                    if value is not None:
                        technical_indicators[indicator] = value
                        
            prediction['technical_indicators'] = technical_indicators
            
            # 風險指標
            risk_metrics = {}
            for metric in ['波動率', '日內波動率']:
                if metric in latest_data:
                    value = safe_float(latest_data[metric])
                    if value is not None:
                        risk_metrics[metric] = value
                        
            prediction['risk_metrics'] = risk_metrics
            
            # 基本資訊
            basic_info = {
                '收盤價': safe_float(latest_data.get('收盤價')),
                '成交量': safe_float(latest_data.get('成交股數')),
                '本益比': safe_float(latest_data.get('本益比'))
            }
            prediction['basic_info'] = {k: v for k, v in basic_info.items() if v is not None}
            
            # 保存預測結果
            if prediction.get('technical_indicators') or prediction.get('risk_metrics'):
                self._save_prediction(prediction)
                return prediction
            else:
                self.logger.warning(f"股票 {stock_id} 缺少足夠的指標數據進行預測")
                return {}
                
        except Exception as e:
            self.logger.error(f"生成股票 {stock_id} 預測時發生錯誤: {str(e)}")
            return {}
    
    def _save_prediction(self, prediction: Dict):
        """保存預測結果"""
        try:
            # 生成檔案名稱
            date_str = prediction['date'].replace('-', '')
            file_path = self.prediction_path / f"prediction_{prediction['stock_id']}_{date_str}.json"
            
            # 保存為JSON格式
            import json
            with open(file_path, 'w', encoding='utf-8') as f:
                json.dump(prediction, f, ensure_ascii=False, indent=4)
                
        except Exception as e:
            self.logger.error(f"保存預測結果時發生錯誤: {str(e)}")
    
    def run_prediction_pipeline(self, stock_ids: List[str]) -> List[Dict]:
        """執行完整的預測流程"""
        predictions = []
        
        for stock_id in stock_ids:
            self.logger.info(f"處理股票 {stock_id} 的預測...")
            prediction = self.generate_prediction(stock_id)
            if prediction:
                predictions.append(prediction)
        
        return predictions

In [4]:
def main():
    """主程式"""
    # 初始化配置
    config = PredictionConfig()
    
    # 建立預測系統
    predictor = PredictionSystem(config)
    
    # 測試用股票列表
    test_stocks = ['2330', '2317', '2454']
    
    # 執行預測
    predictions = predictor.run_prediction_pipeline(test_stocks)
    
    # 輸出結果
    for pred in predictions:
        print(f"股票 {pred['stock_id']} 預測結果:")
        print(f"機率: {pred['probability']:.2f}")
        print(f"信心: {pred['confidence']:.2f}")
        print(f"信號: {pred['signal']}")
        print("-" * 50)

In [5]:
if __name__ == "__main__":
    main()

2024-11-13 15:01:30,517 - INFO - 處理股票 2330 的預測...
2024-11-13 15:01:57,984 - INFO - 處理股票 2317 的預測...
2024-11-13 15:02:23,163 - INFO - 處理股票 2454 的預測...


股票 2330 預測結果:
機率: 0.53
信心: 0.95
信號: 0
--------------------------------------------------
股票 2317 預測結果:
機率: nan
信心: nan
信號: 0
--------------------------------------------------
股票 2454 預測結果:
機率: nan
信心: nan
信號: 0
--------------------------------------------------
