In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from termcolor import colored
import requests
import io
import time
import os
import json
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor
from functools import lru_cache
import random
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
from typing import Optional, Tuple, Dict, Any, List

class RateLimiter:
    def __init__(self, max_requests: int = 2000, time_window: int = 3600):
        self.max_requests = max_requests
        self.time_window = time_window
        self.requests = []
        self.lock = ThreadPoolExecutor(max_workers=1)

    def wait_if_needed(self):
        """Check if we need to wait before making another request"""
        current_time = time.time()
        
        def _cleanup_and_check():
            # Remove old requests
            while self.requests and current_time - self.requests[0] > self.time_window:
                self.requests.pop(0)
            
            # Check if we're at the limit
            if len(self.requests) >= self.max_requests:
                sleep_time = self.requests[0] + self.time_window - current_time
                if sleep_time > 0:
                    time.sleep(sleep_time + random.uniform(0.1, 1.0))
            
            # Add new request timestamp
            self.requests.append(current_time)

        # Execute cleanup and check in a thread-safe manner
        self.lock.submit(_cleanup_and_check).result()

class StockAnalyzer:
    def __init__(self):
        self.base_dir = 'stock_analysis'
        self.stock_lists_dir = os.path.join(self.base_dir, 'stock_lists')
        self.results_dir = os.path.join(self.base_dir, 'daily_results')
        self.delisted_cache_file = os.path.join(self.stock_lists_dir, 'delisted_stocks.json')
        self.setup_directories()
        self.load_delisted_stocks()
        self.rate_limiter = RateLimiter()
        self.session = self._create_robust_session()
        self.batch_size = 10  # Process stocks in smaller batches
        self.retry_delay = 60  # Delay between retries in seconds
        self.max_retries = 3   # Maximum number of retries per stock

    def setup_directories(self):
        """Create necessary directories if they don't exist"""
        os.makedirs(self.stock_lists_dir, exist_ok=True)
        os.makedirs(self.results_dir, exist_ok=True)

    def _create_robust_session(self) -> requests.Session:
        """Create a session with retry mechanism"""
        session = requests.Session()
        retries = Retry(
            total=5,
            backoff_factor=0.5,
            status_forcelist=[429, 500, 502, 503, 504],
            allowed_methods=["HEAD", "GET", "OPTIONS"]
        )
        adapter = HTTPAdapter(max_retries=retries)
        session.mount("http://", adapter)
        session.mount("https://", adapter)
        return session

    def load_delisted_stocks(self):
        """Load previously identified delisted stocks"""
        try:
            with open(self.delisted_cache_file, 'r') as f:
                self.delisted_stocks = json.load(f)
        except FileNotFoundError:
            self.delisted_stocks = {'NSE': [], 'BSE': []}
            self.save_delisted_stocks()

    def save_delisted_stocks(self):
        """Save delisted stocks to cache"""
        with open(self.delisted_cache_file, 'w') as f:
            json.dump(self.delisted_stocks, f)

    def should_update_stock_list(self, exchange: str) -> bool:
        """Check if stock list needs updating (weekly update)"""
        list_file = os.path.join(self.stock_lists_dir, f'{exchange}_stocks.json')
        if not os.path.exists(list_file):
            return True
        file_time = datetime.fromtimestamp(os.path.getmtime(list_file))
        return (datetime.now() - file_time).days >= 7

    def save_stock_list(self, symbols: List[str], exchange: str):
        """Save stock list to JSON file"""
        list_file = os.path.join(self.stock_lists_dir, f'{exchange}_stocks.json')
        data = {
            'symbols': symbols,
            'last_updated': datetime.now().strftime('%Y-%m-%d')
        }
        with open(list_file, 'w') as f:
            json.dump(data, f)

    def load_stock_list(self, exchange: str) -> List[str]:
        """Load stock list from JSON file"""
        list_file = os.path.join(self.stock_lists_dir, f'{exchange}_stocks.json')
        with open(list_file, 'r') as f:
            data = json.load(f)
        return data['symbols']

    def download_nse_stocks(self) -> List[str]:
        """Download NSE stock list if needed"""
        if self.should_update_stock_list('NSE'):
            try:
                url = "https://archives.nseindia.com/content/equities/EQUITY_L.csv"
                response = self.session.get(url)
                df = pd.read_csv(io.StringIO(response.content.decode('utf-8')))
                symbols = df['SYMBOL'].tolist()
                self.save_stock_list(symbols, 'NSE')
                print("NSE stock list updated")
                return symbols
            except Exception as e:
                print(f"Error downloading NSE stocks: {e}")
                if os.path.exists(os.path.join(self.stock_lists_dir, 'NSE_stocks.json')):
                    return self.load_stock_list('NSE')
                return []
        return self.load_stock_list('NSE')

    def download_bse_stocks(self) -> List[str]:
        """Download BSE stock list if needed"""
        if self.should_update_stock_list('BSE'):
            try:
                url = "https://www.bseindia.com/corporates/List_Scrips.aspx"
                response = self.session.get(url)
                soup = BeautifulSoup(response.content, 'html.parser')
                table = soup.find('table', {'id': 'ContentPlaceHolder1_tblData'})
                symbols = []

                if table:
                    rows = table.find_all('tr')[1:]
                    for row in rows:
                        cols = row.find_all('td')
                        if cols:
                            bse_code = cols[0].text.strip()
                            symbols.append(bse_code)

                if symbols:
                    self.save_stock_list(symbols, 'BSE')
                    print("BSE stock list updated")
                    return symbols
            except Exception as e:
                print(f"Error downloading BSE stocks: {e}")
                if os.path.exists(os.path.join(self.stock_lists_dir, 'BSE_stocks.json')):
                    return self.load_stock_list('BSE')
                return []
        return self.load_stock_list('BSE')

    @lru_cache(maxsize=1000)
    def calculate_rsi(self, prices_tuple: Tuple[float, ...], periods: int = 14) -> float:
        """Calculate RSI with caching for performance"""
        prices = pd.Series(prices_tuple)
        delta = prices.diff()
        gain = delta.where(delta > 0, 0).rolling(window=periods).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(window=periods).mean()
        rs = gain / loss
        return float(100 - (100 / (1 + rs)).iloc[-1])
    
    def calculate_ema(self, data: pd.DataFrame, length: int, source: str = 'Close', 
                     offset: int = 0, smoothing_length: int = 9) -> pd.Series:
        """Calculate Exponential Moving Average with specified parameters"""
        try:
            if isinstance(data, pd.Series):
                series_data = data
            else:
                series_data = data[source]
            
            # Calculate SMA for initial smoothing
            sma = series_data.rolling(window=smoothing_length).mean()
            
            # Calculate multiplier
            multiplier = 2 / (length + 1)
            
            # Initialize EMA with SMA
            ema = pd.Series(index=series_data.index, dtype=float)
            ema.iloc[:length-1] = np.nan
            ema.iloc[length-1] = sma.iloc[length-1]
            
            # Calculate EMA
            for i in range(length, len(series_data)):
                ema.iloc[i] = (series_data.iloc[i] - ema.iloc[i-1]) * multiplier + ema.iloc[i-1]
            
            # Apply offset if specified
            if offset != 0:
                ema = ema.shift(offset)
            
            return ema
        except Exception as e:
            print(f"Error calculating EMA: {e}")
            return pd.Series(np.nan, index=data.index)

    def calculate_tema(self, data: pd.DataFrame, length: int, source: str = 'Close') -> pd.Series:
        """Calculate Triple Exponential Moving Average (TEMA)"""
        try:
            # Get price series
            if isinstance(data, pd.Series):
                prices = data
            else:
                prices = data[source]
            
            # Calculate multiplier
            multiplier = 2 / (length + 1)
            
            # Calculate EMA1
            ema1 = prices.ewm(span=length, adjust=False).mean()
            
            # Calculate EMA2
            ema2 = ema1.ewm(span=length, adjust=False).mean()
            
            # Calculate EMA3
            ema3 = ema2.ewm(span=length, adjust=False).mean()
            
            # Calculate TEMA
            tema = 3 * ema1 - 3 * ema2 + ema3
            
            # Replace first 'length' periods with NaN
            tema.iloc[:length-1] = np.nan
            
            return tema
        except Exception as e:
            print(f"Error calculating TEMA: {e}")
            return pd.Series(np.nan, index=data.index)

    def get_stock_data(self, symbol: str, exchange: str = 'NSE', days: int = 200) -> Tuple[Optional[float], Optional[pd.DataFrame], Optional[float], Optional[Dict[str, Any]]]:
        """Fetch stock data with rate limiting and retries"""
        if symbol in self.delisted_stocks[exchange]:
            return None, None, None, None

        for attempt in range(self.max_retries):
            try:
                # Wait if needed based on rate limits
                self.rate_limiter.wait_if_needed()
                
                ticker_symbol = f"{symbol}.NS" if exchange == 'NSE' else f"{symbol}.BO"
                ticker = yf.Ticker(ticker_symbol)
                
                # Add random delay between 1 and 3 seconds
                time.sleep(random.uniform(1, 3))
                
                info = ticker.info
                
                end_date = datetime.now()
                start_date = end_date - timedelta(days=days)

                hist_data = ticker.history(
                    start=start_date,
                    end=end_date,
                    interval="1d"
                )

                if hist_data.empty:
                    return None, None, None, None

                # Calculate technical indicators
                hist_data = self._calculate_technical_indicators(hist_data)
                
                latest_price = hist_data['Close'].iloc[-1]
                prices_tuple = tuple(hist_data['Close'].values)
                latest_rsi = self.calculate_rsi(prices_tuple)

                return latest_price, hist_data, latest_rsi, info

            except requests.exceptions.RequestException as e:
                if "429" in str(e):
                    wait_time = self.retry_delay * (attempt + 1)
                    print(f"Rate limit hit for {symbol}, waiting {wait_time} seconds...")
                    time.sleep(wait_time)
                    continue
                print(f"Error fetching data for {symbol}: {e}")
            except Exception as e:
                print(f"Error processing {symbol}: {e}")
            
            if attempt == self.max_retries - 1:
                return None, None, None, None
            time.sleep(random.uniform(1, 3))

    def _calculate_technical_indicators(self, hist_data: pd.DataFrame) -> pd.DataFrame:
        """Calculate technical indicators with error handling"""
        try:
            if len(hist_data) >= 9:
                hist_data['EMA_9'] = self.calculate_ema(hist_data, length=9)
            if len(hist_data) >= 21:
                hist_data['EMA_21'] = self.calculate_ema(hist_data, length=21)
            if len(hist_data) >= 50:
                hist_data['EMA_50'] = self.calculate_ema(hist_data, length=50)
                hist_data['TEMA_50'] = self.calculate_tema(hist_data, length=50)
        except Exception as e:
            print(f"Error calculating technical indicators: {e}")
            for indicator in ['EMA_9', 'EMA_21', 'EMA_50', 'TEMA_50']:
                hist_data[indicator] = float('nan')
        return hist_data

    def _process_stock_info(self, symbol: str, exchange: str, latest_price: float, 
                           hist_data: pd.DataFrame, latest_rsi: float, info: Dict[str, Any]) -> Dict[str, Any]:
        """Process and format stock information"""
        stock_info = {
            'Symbol': symbol,
            'Exchange': exchange,
            'Company_Name': info.get('longName', 'N/A'),
            'Latest_Price': latest_price,
            'RSI': latest_rsi,
            'Date': datetime.now().strftime('%Y-%m-%d')
        }
        
        # Add technical indicators
        for indicator in ['EMA_9', 'EMA_21', 'EMA_50', 'TEMA_50']:
            stock_info[indicator] = hist_data[indicator].iloc[-1] if indicator in hist_data else 'N/A'
        
        # Add other info fields
        info_fields = {
            'Volume': ('Volume', hist_data['Volume'].iloc[-1] if 'Volume' in hist_data else 0),
            'Market_Cap': ('marketCap', 'N/A'),
            'PE_Ratio': ('trailingPE', 'N/A'),
            'EPS': ('trailingEps', 'N/A'),
            'Dividend_Yield': ('dividendYield', 'N/A'),
            'Book_Value': ('bookValue', 'N/A'),
            'Sector': ('sector', 'N/A'),
            'Industry': ('industry', 'N/A'),
            '52W_High': ('fiftyTwoWeekHigh', 'N/A'),
            '52W_Low': ('fiftyTwoWeekLow', 'N/A'),
            'Beta': ('beta', 'N/A'),
            'Previous_Close': ('previousClose', 'N/A'),
            'Open': ('open', 'N/A'),
            'Day_High': ('dayHigh', 'N/A'),
            'Day_Low': ('dayLow', 'N/A')
        }
        
        for key, (info_key, default) in info_fields.items():
            stock_info[key] = info.get(info_key, default)
        
        return stock_info

    def process_stock_batch(self, batch_data: Tuple[List[str], str]) -> List[Dict[str, Any]]:
        """Process a batch of stocks with rate limiting"""
        symbols, exchange = batch_data
        results = []
        
        # Process in smaller batches
        for i in range(0, len(symbols), self.batch_size):
            batch_symbols = symbols[i:i + self.batch_size]
            print(f"Processing batch {i//self.batch_size + 1} of {len(symbols)//self.batch_size + 1}")
            
            with ThreadPoolExecutor(max_workers=4) as executor:
                futures = {executor.submit(self.get_stock_data, symbol, exchange): symbol 
                          for symbol in batch_symbols}
                
                for future in futures:
                    symbol = futures[future]
                    try:
                        stock_data = future.result()
                        if all(v is not None for v in stock_data):
                            latest_price, hist_data, latest_rsi, info = stock_data
                            stock_info = self._process_stock_info(symbol, exchange, latest_price, 
                                                                hist_data, latest_rsi, info)
                            results.append(stock_info)
                    except Exception as e:
                        print(f"Error processing {symbol}: {e}")
            
            # Add delay between batches
            time.sleep(random.uniform(2, 5))
        
        return results

    def run_daily_analysis(self):
        """Run daily analysis with Excel output"""
        current_date = datetime.now().strftime('%Y%m%d')
        
        print("Loading stock lists...")
        nse_symbols = self.download_nse_stocks()
        bse_symbols = self.download_bse_stocks()

        print(f"\nAnalyzing {len(nse_symbols)} NSE and {len(bse_symbols)} BSE stocks")

        for exchange, symbols in [('NSE', nse_symbols), ('BSE', bse_symbols)]:
            batch_data = (symbols, exchange)
            results = self.process_stock_batch(batch_data)
            
            if results:
                excel_file = os.path.join(
                    self.results_dir,
                    f'{exchange.lower()}_analysis_{current_date}.xlsx'
                )
                
                results_df = pd.DataFrame(results)
                
                with pd.ExcelWriter(excel_file, engine='xlsxwriter') as writer:
                    results_df.to_excel(writer, sheet_name='All Stocks', index=False)
                    
                    high_rsi = results_df[results_df['RSI'] >= 40].sort_values('RSI', ascending=False)
                    high_rsi.to_excel(writer, sheet_name='High RSI Stocks', index=False)
                    
                    workbook = writer.book
                    header_format = workbook.add_format({
                        'bold': True,
                        'text_wrap': True,
                        'valign': 'top',
                        'bg_color': '#D9E1F2',
                        'border': 1
                    })
                    
                    for worksheet in writer.sheets.values():
                        worksheet.set_column('A:Z', 15)
                        for col_num, value in enumerate(results_df.columns.values):
                            worksheet.write(0, col_num, value, header_format)

                print(f"\n{exchange} analysis complete. Results saved to {excel_file}")
                
                print(f"\n{exchange} Stocks with RSI >= 40:")
                for _, row in high_rsi.iterrows():
                    print(colored(
                        f"{row['Symbol']} ({row['Company_Name']}): RSI = {row['RSI']:.2f}, "
                        f"Price = ₹{row['Latest_Price']:.2f}",
                        'green', attrs=['bold']
                    ))

        print(f"\nDaily analysis complete. Results saved in {self.results_dir}")


if __name__ == "__main__":
    analyzer = StockAnalyzer()
    analyzer.run_daily_analysis()