In [None]:
import logging
from pathlib import Path
from dataclasses import dataclass
from typing import Optional
from concurrent.futures import ThreadPoolExecutor, as_completed
import pandas as pd
import numpy as np
import yfinance as yf
import time


@dataclass
class FilterConfig:
    """Konfiguration für Stock-Filter"""
    return_filter: float = 0.10  # 10% statt 15%
    pe_dc_filter: float = 0.05  # 5% statt 15% - realistischer
    min_volume: int = 500_000  # Mindest-Handelsvolumen
    max_ps_ratio: float = 5.0  # Maximales P/S Ratio
    min_market_cap: float = 1e9  # Mindestens 1 Milliarde
    max_debt_to_equity: float = 2.0  # Max Verschuldungsgrad als Ratio (2.0 = 200%)
    enable_debt_filter: bool = False  # Standardmäßig deaktiviert


class StockScreener:
    """Verbesserte Stock-Screening-Klasse"""
    
    def __init__(self, portfolio_file: Optional[str] = None, config: Optional[FilterConfig] = None):
        self.logger = logging.getLogger(__name__)
        self.config = config or FilterConfig()
        self.stocks = self._load_stocks(portfolio_file) if portfolio_file else []
        self.data_df: Optional[pd.DataFrame] = None
        
    def _load_stocks(self, pf: str) -> list[str]:
        """Lädt Stock-Symbole aus Datei"""
        path = Path(pf)
        try:
            if not path.exists():
                self.logger.warning(f'Portfolio file not found: {path}')
                return []
            
            with open(path, 'r') as f:
                stocks = [line.strip() for line in f if line.strip()]
            
            self.logger.info(f'Loaded {len(stocks)} stocks from {path}')
            return stocks
            
        except Exception as e:
            self.logger.error(f'Error loading portfolio: {e}')
            return []
    
    def _fetch_single_stock(self, symbol: str) -> dict:
        """Holt Daten für einzelne Aktie"""
        try:
            ticker = yf.Ticker(symbol)
            info = ticker.info
            
            # Extrahiere relevante Metriken mit Fallback
            debt_raw = info.get('debtToEquity', np.nan)
            # yfinance gibt debtToEquity als Prozentsatz zurück (z.B. 150.0 = 150%)
            # Konvertiere zu Ratio: 150% -> 1.5
            debt_ratio = debt_raw / 100.0 if pd.notna(debt_raw) else np.nan
            
            data = {
                'symbol': symbol,
                '52wk_change': info.get('52WeekChange', np.nan),
                'ps_ttm': info.get('priceToSalesTrailing12Months', np.nan),
                'pe_ttm': info.get('trailingPE', np.nan),
                'pe_forward': info.get('forwardPE', np.nan),
                'market_cap': info.get('marketCap', np.nan),
                'avg_volume': info.get('averageVolume', np.nan),
                'debt_to_equity': debt_ratio,  # Jetzt als Ratio
                'profit_margin': info.get('profitMargins', np.nan),
                'earnings_growth': info.get('earningsGrowth', np.nan),
                'beta': info.get('beta', np.nan),
            }
            
            # Berechne PE-Discount
            if pd.notna(data['pe_ttm']) and pd.notna(data['pe_forward']) and data['pe_forward'] > 0:
                data['pe_discount'] = (data['pe_ttm'] / data['pe_forward']) - 1
            else:
                data['pe_discount'] = np.nan
            
            return data
            
        except Exception as e:
            self.logger.warning(f'Error fetching {symbol}: {e}')
            return {'symbol': symbol}
    
    def fetch_stock_data(self, max_workers: int = 10) -> pd.DataFrame:
        """Holt Daten parallel für alle Aktien"""
        if not self.stocks:
            self.logger.warning('No stocks to fetch')
            return pd.DataFrame()
        
        self.logger.info(f'Fetching data for {len(self.stocks)} stocks...')
        results = []
        
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            future_to_stock = {executor.submit(self._fetch_single_stock, stock): stock 
                             for stock in self.stocks}
            
            for future in as_completed(future_to_stock):
                stock = future_to_stock[future]
                try:
                    result = future.result()
                    results.append(result)
                except Exception as e:
                    self.logger.error(f'Exception for {stock}: {e}')
        
        df = pd.DataFrame(results)
        if not df.empty:
            df.set_index('symbol', inplace=True)
            df = df.apply(pd.to_numeric, errors='ignore')
            self.data_df = df
            self.logger.info(f'Successfully fetched data for {len(df)} stocks')
        
        return df
    
    def _filter_ps_ratio(self, df: pd.DataFrame) -> pd.DataFrame:
        """Filtert nach P/S Ratio"""
        median_ps = df['ps_ttm'].median()
        self.logger.info(f'Median P/S ratio: {median_ps:.2f}')
        
        # Beide Bedingungen: unter Median UND unter absolutem Max
        filtered = df[
            (df['ps_ttm'] <= median_ps) & 
            (df['ps_ttm'] <= self.config.max_ps_ratio)
        ]
        
        self.logger.info(f'After P/S filter: {len(filtered)}/{len(df)} stocks remain')
        return filtered
    
    def _filter_pe_discount(self, df: pd.DataFrame) -> pd.DataFrame:
        """Filtert nach PE-Discount (optional - nur wenn Daten vorhanden)"""
        # Nur anwenden wenn genug Aktien Forward PE haben
        valid_pe = df['pe_discount'].notna().sum()
        
        if valid_pe < len(df) * 0.3:  # Weniger als 30% haben Forward PE
            self.logger.info(f'Skipping PE discount filter (only {valid_pe}/{len(df)} have data)')
            return df
        
        filtered = df[
            (df['pe_discount'] >= self.config.pe_dc_filter) | 
            (df['pe_discount'].isna())  # Behalte Aktien ohne Forward PE
        ]
        
        self.logger.info(f'After PE discount filter: {len(filtered)}/{len(df)} stocks remain')
        return filtered
    
    def _filter_returns(self, df: pd.DataFrame) -> pd.DataFrame:
        """Filtert nach 52-Wochen-Performance"""
        filtered = df[df['52wk_change'] >= self.config.return_filter]
        self.logger.info(f'After return filter (>={self.config.return_filter:.0%}): {len(filtered)}/{len(df)} stocks remain')
        return filtered
    
    def _filter_volume(self, df: pd.DataFrame) -> pd.DataFrame:
        """Filtert nach Handelsvolumen"""
        filtered = df[df['avg_volume'] >= self.config.min_volume]
        self.logger.info(f'After volume filter: {len(filtered)}/{len(df)} stocks remain')
        return filtered
    
    def _filter_market_cap(self, df: pd.DataFrame) -> pd.DataFrame:
        """Filtert nach Marktkapitalisierung"""
        filtered = df[df['market_cap'] >= self.config.min_market_cap]
        self.logger.info(f'After market cap filter: {len(filtered)}/{len(df)} stocks remain')
        return filtered
    
    def _filter_debt(self, df: pd.DataFrame) -> pd.DataFrame:
        """Filtert nach Verschuldungsgrad (optional und konservativ)"""
        # Prüfe ob Filter aktiviert ist
        if not self.config.enable_debt_filter:
            self.logger.info('Debt filter disabled - skipping')
            return df
            
        # Prüfe Datenverfügbarkeit
        valid_debt = df['debt_to_equity'].notna()
        debt_available = valid_debt.sum()
        
        self.logger.info(f'Debt data available for {debt_available}/{len(df)} stocks')
        
        # Wenn weniger als 50% Daten haben, überspringe Filter
        if debt_available < len(df) * 0.5:
            self.logger.info(f'Skipping debt filter (insufficient data: {debt_available}/{len(df)})')
            return df
        
        # Zeige Verteilung
        if debt_available > 0:
            debt_stats = df.loc[valid_debt, 'debt_to_equity'].describe()
            self.logger.info(f'Debt/Equity stats: median={debt_stats["50%"]:.2f}, mean={debt_stats["mean"]:.2f}, max={debt_stats["max"]:.2f}')
        
        # Behalte Aktien MIT niedrigem Debt UND ohne Debt-Daten
        filtered = df[
            (df['debt_to_equity'] <= self.config.max_debt_to_equity) | 
            (df['debt_to_equity'].isna())  # Behalte alle ohne Daten
        ]
        
        removed = len(df) - len(filtered)
        if removed > 0:
            self.logger.info(f'After debt filter: {len(filtered)}/{len(df)} stocks remain (removed {removed})')
        else:
            self.logger.info(f'After debt filter: all stocks remain')
            
        return filtered
    
    def apply_filters(self) -> pd.DataFrame:
        """Wendet alle Filter an"""
        if self.data_df is None:
            self.fetch_stock_data()
        
        df = self.data_df.copy()
        if df.empty:
            self.logger.warning('No data to filter')
            return df
        
        self.logger.info(f'\n{"="*50}\nStarting filter pipeline with {len(df)} stocks\n{"="*50}')
        
        # Filter-Pipeline
        df = self._filter_market_cap(df)
        df = self._filter_volume(df)
        df = self._filter_ps_ratio(df)
        df = self._filter_returns(df)
        df = self._filter_pe_discount(df)
        df = self._filter_debt(df)
        
        # Sortiere nach Score
        if not df.empty:
            df = self._calculate_score(df)
            df.sort_values('score', ascending=False, inplace=True)
        
        self.logger.info(f'\n{"="*50}\nFinal result: {len(df)} stocks passed all filters\n{"="*50}')
        return df
    
    def _calculate_score(self, df: pd.DataFrame) -> pd.DataFrame:
        """Berechnet Ranking-Score"""
        df = df.copy()
        
        # Normalisiere Metriken (0-1)
        df['return_score'] = (df['52wk_change'] - df['52wk_change'].min()) / (df['52wk_change'].max() - df['52wk_change'].min() + 1e-10)
        df['ps_score'] = 1 - ((df['ps_ttm'] - df['ps_ttm'].min()) / (df['ps_ttm'].max() - df['ps_ttm'].min() + 1e-10))
        df['pe_score'] = df['pe_discount'].fillna(0) / (df['pe_discount'].max() + 1e-10)
        
        # Gewichteter Score
        df['score'] = (
            df['return_score'] * 0.4 +
            df['ps_score'] * 0.3 +
            df['pe_score'] * 0.3
        )
        
        return df
    
    def save_results(self, df: pd.DataFrame, output_dir: str = 'lists'):
        """Speichert gefilterte Ergebnisse"""
        if df.empty:
            self.logger.info('No stocks to save')
            return
        
        folder = Path(output_dir)
        folder.mkdir(exist_ok=True)
        
        # Speichere nur Symbole
        symbols_file = folder / 'filtered.txt'
        existing_symbols = set()
        
        if symbols_file.exists():
            with open(symbols_file, 'r') as f:
                existing_symbols = {line.strip() for line in f}
        
        new_symbols = set(df.index) - existing_symbols
        
        if new_symbols:
            with open(symbols_file, 'a') as f:
                for symbol in new_symbols:
                    f.write(f'{symbol}\n')
            self.logger.info(f'Added {len(new_symbols)} new stocks to {symbols_file}')
        else:
            self.logger.info('No new stocks to add')
        
        # Speichere detaillierte CSV
        csv_file = folder / 'filtered_details.csv'
        df.to_csv(csv_file)
        self.logger.info(f'Saved detailed results to {csv_file}')
        
        # Zeige Top-Resultate
        print(f'\n{"="*80}\nTop 10 Filtered Stocks:\n{"="*80}')
        display_cols = ['52wk_change', 'ps_ttm', 'pe_ttm', 'pe_forward', 'pe_discount', 'score']
        print(df[display_cols].head(10).to_string())
        print(f'\n{"="*80}\n')
    
    def run(self):
        """Führt kompletten Screening-Prozess aus"""
        filtered_df = self.apply_filters()
        self.save_results(filtered_df)
        return filtered_df


if __name__ == '__main__':
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
    )
    
    # Angepasste Filter-Config für mehr Durchlässigkeit
    config = FilterConfig(
        return_filter=0.05,  # Nur 5% Performance gefordert
        pe_dc_filter=0.05,   # Nur 5% PE-Discount
        min_volume=300_000,  # Niedrigere Liquidität OK
        max_ps_ratio=10.0,   # Höheres P/S erlaubt
        min_market_cap=5e8,  # 500M statt 1B
        max_debt_to_equity=2.0,  # 2.0 = 200%
        enable_debt_filter=True,  # Debt-Filter
    )
    
    screener = StockScreener('lists/worldstocks.txt', config=config)
    screener.run()