In [3]:
import logging
from pathlib import Path
from dataclasses import dataclass
from typing import Optional
from concurrent.futures import ThreadPoolExecutor, as_completed
import pandas as pd
import numpy as np
import yfinance as yf
import time


@dataclass
class FilterConfig:
    """Konfiguration für Stock-Filter"""
    min_volume: int = 500_000  # Mindest-Handelsvolumen
    min_market_cap: float = 1e9  # Mindestens 1 Milliarde
    max_debt_to_equity: float = 2.0  # Max Verschuldungsgrad als Ratio (2.0 = 200%)
    enable_debt_filter: bool = False  # Standardmäßig deaktiviert


class StockScreener:
    """Verbesserte Stock-Screening-Klasse mit ökonomischem Scoring"""
    
    def __init__(self, portfolio_file: Optional[str] = None, config: Optional[FilterConfig] = None):
        self.logger = logging.getLogger(__name__)
        self.config = config or FilterConfig()
        self.stocks = self._load_stocks(portfolio_file) if portfolio_file else []
        self.data_df: Optional[pd.DataFrame] = None
        
    def _load_stocks(self, pf: str) -> list[str]:
        """Lädt Stock-Symbole aus Datei"""
        path = Path(pf)
        try:
            if not path.exists():
                self.logger.warning(f'Portfolio file not found: {path}')
                return []
            
            with open(path, 'r') as f:
                stocks = [line.strip() for line in f if line.strip()]
            
            self.logger.info(f'Loaded {len(stocks)} stocks from {path}')
            return stocks
            
        except Exception as e:
            self.logger.error(f'Error loading portfolio: {e}')
            return []
    
    def _fetch_single_stock(self, symbol: str) -> dict:
        """Holt Daten für einzelne Aktie"""
        try:
            ticker = yf.Ticker(symbol)
            info = ticker.info
            
            # Extrahiere relevante Metriken mit Fallback
            debt_raw = info.get('debtToEquity', np.nan)
            # yfinance gibt debtToEquity als Prozentsatz zurück (z.B. 150.0 = 150%)
            # Konvertiere zu Ratio: 150% -> 1.5
            debt_ratio = debt_raw / 100.0 if pd.notna(debt_raw) else np.nan
            
            # FCF Yield berechnen
            free_cash_flow = info.get('freeCashflow', np.nan)
            market_cap = info.get('marketCap', np.nan)
            fcf_yield = (free_cash_flow / market_cap) if (pd.notna(free_cash_flow) and pd.notna(market_cap) and market_cap > 0) else np.nan
            
            data = {
                'symbol': symbol,
                '52wk_change': info.get('52WeekChange', np.nan),
                'ps_ttm': info.get('priceToSalesTrailing12Months', np.nan),
                'pe_ttm': info.get('trailingPE', np.nan),
                'pe_forward': info.get('forwardPE', np.nan),
                'market_cap': market_cap,
                'avg_volume': info.get('averageVolume', np.nan),
                'debt_to_equity': debt_ratio,
                'profit_margin': info.get('profitMargins', np.nan),
                'earnings_growth': info.get('earningsGrowth', np.nan),
                'beta': info.get('beta', np.nan),
                'fcf_yield': fcf_yield,
            }
            
            # Berechne PE-Discount
            if pd.notna(data['pe_ttm']) and pd.notna(data['pe_forward']) and data['pe_forward'] > 0:
                data['pe_discount'] = (data['pe_ttm'] / data['pe_forward']) - 1
            else:
                data['pe_discount'] = np.nan
            
            return data
            
        except Exception as e:
            self.logger.warning(f'Error fetching {symbol}: {e}')
            return {'symbol': symbol}
    
    def fetch_stock_data(self, max_workers: int = 10) -> pd.DataFrame:
        """Holt Daten parallel für alle Aktien"""
        if not self.stocks:
            self.logger.warning('No stocks to fetch')
            return pd.DataFrame()
        
        self.logger.info(f'Fetching data for {len(self.stocks)} stocks...')
        results = []
        
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            future_to_stock = {executor.submit(self._fetch_single_stock, stock): stock 
                             for stock in self.stocks}
            
            for future in as_completed(future_to_stock):
                stock = future_to_stock[future]
                try:
                    result = future.result()
                    results.append(result)
                except Exception as e:
                    self.logger.error(f'Exception for {stock}: {e}')
        
        df = pd.DataFrame(results)
        if not df.empty:
            df.set_index('symbol', inplace=True)
            df = df.apply(pd.to_numeric, errors='ignore')
            self.data_df = df
            self.logger.info(f'Successfully fetched data for {len(df)} stocks')
        
        return df
    
    def _filter_volume(self, df: pd.DataFrame) -> pd.DataFrame:
        """Filtert nach Handelsvolumen"""
        filtered = df[df['avg_volume'] >= self.config.min_volume]
        self.logger.info(f'After volume filter: {len(filtered)}/{len(df)} stocks remain')
        return filtered
    
    def _filter_market_cap(self, df: pd.DataFrame) -> pd.DataFrame:
        """Filtert nach Marktkapitalisierung"""
        filtered = df[df['market_cap'] >= self.config.min_market_cap]
        self.logger.info(f'After market cap filter: {len(filtered)}/{len(df)} stocks remain')
        return filtered
    
    def _filter_debt(self, df: pd.DataFrame) -> pd.DataFrame:
        """Filtert nach Verschuldungsgrad (optional und konservativ)"""
        if not self.config.enable_debt_filter:
            self.logger.info('Debt filter disabled - skipping')
            return df
            
        valid_debt = df['debt_to_equity'].notna()
        debt_available = valid_debt.sum()
        
        self.logger.info(f'Debt data available for {debt_available}/{len(df)} stocks')
        
        if debt_available < len(df) * 0.5:
            self.logger.info(f'Skipping debt filter (insufficient data: {debt_available}/{len(df)})')
            return df
        
        if debt_available > 0:
            debt_stats = df.loc[valid_debt, 'debt_to_equity'].describe()
            self.logger.info(f'Debt/Equity stats: median={debt_stats["50%"]:.2f}, mean={debt_stats["mean"]:.2f}, max={debt_stats["max"]:.2f}')
        
        filtered = df[
            (df['debt_to_equity'] <= self.config.max_debt_to_equity) | 
            (df['debt_to_equity'].isna())
        ]
        
        removed = len(df) - len(filtered)
        if removed > 0:
            self.logger.info(f'After debt filter: {len(filtered)}/{len(df)} stocks remain (removed {removed})')
        else:
            self.logger.info(f'After debt filter: all stocks remain')
            
        return filtered
    
    def _filter_required_data(self, df: pd.DataFrame) -> pd.DataFrame:
        """Filtert Aktien ohne vollständige Kern-Daten"""
        required_cols = ['profit_margin', 'ps_ttm', 'pe_ttm', 'pe_forward', 'beta', '52wk_change']
        
        initial_count = len(df)
        # Behalte nur Zeilen, wo ALLE required columns nicht-NaN sind
        filtered = df.dropna(subset=required_cols)
        
        removed = initial_count - len(filtered)
        self.logger.info(f'After required data filter: {len(filtered)}/{initial_count} stocks remain (removed {removed} with missing data)')
        
        return filtered
    
    def apply_filters(self) -> pd.DataFrame:
        """Wendet Basis-Filter an"""
        if self.data_df is None:
            self.fetch_stock_data()
        
        df = self.data_df.copy()
        if df.empty:
            self.logger.warning('No data to filter')
            return df
        
        self.logger.info(f'\n{"="*50}\nStarting filter pipeline with {len(df)} stocks\n{"="*50}')
        
        # Basis-Filter (nur strukturelle Kriterien)
        df = self._filter_market_cap(df)
        df = self._filter_volume(df)
        df = self._filter_debt(df)
        df = self._filter_required_data(df)  # Rausschmeißen wenn Kern-Daten fehlen
        
        # Score berechnen und sortieren
        if not df.empty:
            df = self._calculate_score(df)
            df.sort_values('score', ascending=False, inplace=True)
        
        self.logger.info(f'\n{"="*50}\nFinal result: {len(df)} stocks passed all filters\n{"="*50}')
        return df
    
    @staticmethod
    def _sigmoid(x: float) -> float:
        """Sigmoid-Funktion für sanfte Normalisierung"""
        return 1 / (1 + np.exp(-x))
    
    @staticmethod
    def _clamp(value, min_val=0.0, max_val=1.0):
        """Begrenzt Werte auf [min_val, max_val]"""
        return np.clip(value, min_val, max_val)
    
    def _calculate_score(self, df: pd.DataFrame) -> pd.DataFrame:
        """Berechnet ökonomischen Score nach 3-Säulen-Modell"""
        df = df.copy()
        
        # ===== A) PROFITABILITÄTS-SCORE =====
        # Profit Margin: sigmoid für sanfte Kurve
        df['profit_score'] = self._sigmoid(df['profit_margin'] * 10)  # Skalierung: 20% margin → sigmoid(2)
        
        # Optional: FCF Yield einbeziehen (falls verfügbar)
        if 'fcf_yield' in df.columns:
            fcf_available = df['fcf_yield'].notna().sum()
            if fcf_available > len(df) * 0.3:  # Wenn >30% FCF haben
                df['fcf_score'] = self._sigmoid(df['fcf_yield'] * 20).fillna(0.5)  # 5% yield → sigmoid(1)
                # Gewichte: 70% Profit Margin, 30% FCF Yield
                df['profit_score'] = 0.7 * df['profit_score'] + 0.3 * df['fcf_score']
                self.logger.info(f'Including FCF Yield in profit score ({fcf_available}/{len(df)} stocks have data)')
        
        # ===== B) BEWERTUNGS-SCORE =====
        # P/S Score: je niedriger, desto besser (4/ps → höhere Werte bei niedrigem P/S)
        df['ps_score'] = self._clamp(4.0 / df['ps_ttm'])
        
        # P/E Score: je niedriger, desto besser (30/pe)
        df['pe_score'] = self._clamp(30.0 / df['pe_ttm'])
        
        # PE-Discount Score: sigmoid, positiver Discount ist gut
        df['pe_discount_score'] = self._clamp(self._sigmoid(df['pe_discount'] * 5))
        
        # Valuation Score kombinieren: 40% P/S, 30% P/E, 30% PE-Discount
        df['val_score'] = (
            0.40 * df['ps_score'] +
            0.30 * df['pe_score'] +
            0.30 * df['pe_discount_score']
        )
        
        # ===== C) STABILITÄT & RISIKO-SCORE =====
        # Beta-Risiko: Beta nahe 1 ist gut, Abweichungen schlecht
        df['risk_score'] = 1 - self._clamp(np.abs(df['beta'] - 1) / 1.5)
        
        # Momentum: 52-Wochen-Change als Trendindikator
        df['momentum_score'] = self._sigmoid(df['52wk_change'] * 2)  # 50% change → sigmoid(1)
        
        # Stability Score: 60% Risiko, 40% Momentum
        df['stability_score'] = (
            0.6 * df['risk_score'] +
            0.4 * df['momentum_score']
        )
        
        # ===== GESAMT-SCORE =====
        df['score'] = (
            0.45 * df['profit_score'] +
            0.35 * df['val_score'] +
            0.20 * df['stability_score']
        )
        
        # Score-Komponenten für Debugging
        self.logger.info(f'Score stats: mean={df["score"].mean():.3f}, median={df["score"].median():.3f}, std={df["score"].std():.3f}')
        
        return df
    
    def save_results(self, df: pd.DataFrame, output_dir: str = 'lists'):
        """Speichert gefilterte Ergebnisse"""
        if df.empty:
            self.logger.info('No stocks to save')
            return
        
        folder = Path(output_dir)
        folder.mkdir(exist_ok=True)
        
        # Speichere nur Symbole
        symbols_file = folder / 'filtered.txt'
        existing_symbols = set()
        
        if symbols_file.exists():
            with open(symbols_file, 'r') as f:
                existing_symbols = {line.strip() for line in f}
        
        new_symbols = set(df.index) - existing_symbols
        
        if new_symbols:
            with open(symbols_file, 'a') as f:
                for symbol in new_symbols:
                    f.write(f'{symbol}\n')
            self.logger.info(f'Added {len(new_symbols)} new stocks to {symbols_file}')
        else:
            self.logger.info('No new stocks to add')
        
        # Speichere detaillierte CSV
        csv_file = folder / 'filtered_details.csv'
        df.to_csv(csv_file)
        self.logger.info(f'Saved detailed results to {csv_file}')
        
        # Zeige Top-Resultate mit Score-Komponenten
        print(f'\n{"="*100}\nTop 10 Filtered Stocks:\n{"="*100}')
        display_cols = [
            'score', 'profit_score', 'val_score', 'stability_score',
            'profit_margin', 'ps_ttm', 'pe_ttm', 'pe_discount', 
            'beta', '52wk_change'
        ]
        # Nur Spalten anzeigen, die existieren
        display_cols = [col for col in display_cols if col in df.columns]
        print(df[display_cols].head(10).to_string())
        print(f'\n{"="*100}\n')
    
    def run(self):
        """Führt kompletten Screening-Prozess aus"""
        filtered_df = self.apply_filters()
        self.save_results(filtered_df)
        return filtered_df


if __name__ == '__main__':
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
    )
    
    # Vereinfachte Config: nur strukturelle Filter
    config = FilterConfig(
        min_volume=300_000,
        min_market_cap=5e8,  # 500M
        max_debt_to_equity=2.0,
        enable_debt_filter=True,
    )
    
    screener = StockScreener('lists/worldstocks.txt', config=config)
    screener.run()

2025-11-23 01:20:57,641 - __main__ - INFO - Loaded 315 stocks from lists/worldstocks.txt
2025-11-23 01:20:57,641 - __main__ - INFO - Fetching data for 315 stocks...
2025-11-23 01:21:00,950 - yfinance - ERROR - HTTP Error 404: 
2025-11-23 01:21:05,389 - yfinance - ERROR - HTTP Error 404: 
2025-11-23 01:21:06,350 - yfinance - ERROR - HTTP Error 404: 
2025-11-23 01:21:12,190 - yfinance - ERROR - HTTP Error 404: 
2025-11-23 01:21:12,200 - yfinance - ERROR - HTTP Error 404: 
2025-11-23 01:21:12,214 - yfinance - ERROR - HTTP Error 404: 
2025-11-23 01:21:16,002 - yfinance - ERROR - HTTP Error 404: 
2025-11-23 01:21:17,905 - yfinance - ERROR - HTTP Error 404: 
2025-11-23 01:21:19,293 - yfinance - ERROR - HTTP Error 404: 
2025-11-23 01:21:38,465 - yfinance - ERROR - HTTP Error 404: 
2025-11-23 01:21:38,564 - __main__ - INFO - Successfully fetched data for 315 stocks
2025-11-23 01:21:38,567 - __main__ - INFO - 
Starting filter pipeline with 315 stocks
2025-11-23 01:21:38,571 - __main__ - INFO - 


Top 10 Filtered Stocks:
            score  profit_score  val_score  stability_score  profit_margin    ps_ttm     pe_ttm  pe_discount   beta  52wk_change
symbol                                                                                                                          
III.L    0.915246      0.977791   0.928040         0.752131        0.94819  4.877459   5.043143    83.115562  1.067    -0.105738
URW.PA   0.881839      0.897531   0.992929         0.652122        0.21365  3.544980  16.741573     0.744798  1.445     0.151765
LLOY.L   0.881453      0.780237   1.000000         0.901732        0.22010  2.858964  14.520000   115.166654  0.988     0.593853
PRU.L    0.858330      0.908684   0.776539         0.888167        0.25851  1.993326  10.683674    -0.214286  1.046     0.594093
AHT.L    0.848357      0.844664   0.951566         0.676052        0.13678  1.816682  18.011494     0.329502  1.182    -0.261546
0382.HK  0.845185      0.939448   0.904583         0.529148        0.258