In [2]:
import os
import pandas as pd
import ta
from typing import List
from IPython.display import display, HTML

In [3]:
def display_scrollable_df(df, max_rows=25, max_cols=10, width="100%", height="500px"):
    display(HTML(df.to_html(
        max_rows=max_rows,
        max_cols=max_cols,
        escape=False,
        notebook=True
    ).replace('<table ', f'<div style="overflow:auto; width:{width}; height:{height}"><table ')
     + '</div>'))

In [4]:
ohlcv_map = {
    'date': 'TradDt',
    'symbol': 'TckrSymb',
    'open': 'OpnPric',
    'high': 'HghPric',
    'low': 'LwPric',
    'close': 'ClsPric',
    'volume': 'TtlTradgVol'
}

In [5]:
def list_csv_gz_files(base_dir: str):
    file_list = []
    for root, _, files in os.walk(base_dir):
        for file in files:
            if file.endswith('.csv.gz'):
                file_list.append(os.path.join(root, file))
    return sorted(file_list)

In [6]:
def load_rolling_window(file_list: List[str], window_size: int) -> pd.DataFrame:
    required_cols = set(ohlcv_map.values())
    recent_files = file_list[:window_size]
    dfs = []

    for file_path in recent_files:
        try:
            df = pd.read_csv(file_path, compression='gzip')
        except Exception as e:
            if verbose:
                print(f"[ERROR] Failed to read {file_path}: {e}")
            continue

        if df.empty:
            if verbose:
                print(f"[INFO] Skipping empty file: {file_path}")
            continue

        missing_cols = required_cols - set(df.columns)
        if missing_cols:
            if verbose:
                print(f"[WARNING] Missing columns {missing_cols} in {file_path}, skipping.")
            continue

        # Ensure date is in datetime format
        df[ohlcv_map['date']] = pd.to_datetime(df[ohlcv_map['date']])

        dfs.append(df)

    if not dfs:
        if verbose:
            print("[INFO] No valid files loaded.")
        return pd.DataFrame()

    combined_df = pd.concat(dfs, ignore_index=True)

    # Sort by stock and date
    combined_df.sort_values(by=[ohlcv_map['symbol'], ohlcv_map['date']], inplace=True)

    return combined_df

In [7]:
def compute_indicators(df: pd.DataFrame) -> pd.DataFrame:
    # Step 1: Keep only required columns
    df = df[['TradDt', 'TckrSymb', 'OpnPric', 'HghPric', 'LwPric', 'ClsPric', 'TtlTradgVol']].copy()

    # Step 2: Rename columns
    df.columns = ['Date', 'Symbol', 'Open', 'High', 'Low', 'Close', 'Volume']
    df['Date'] = pd.to_datetime(df['Date'])
    df.sort_values(by=['Symbol', 'Date'], inplace=True)

    # Step 3: Group by Symbol and compute indicators
    results = []

    for symbol, group in df.groupby('Symbol'):
        g = group.copy()
        g.set_index('Date', inplace=True)

        # === Lagging Indicators ===
        g['EMA_20'] = ta.trend.EMAIndicator(close=g['Close'], window=20).ema_indicator()
        g['EMA_50'] = ta.trend.EMAIndicator(close=g['Close'], window=50).ema_indicator()
        
        macd = ta.trend.MACD(close=g['Close'], window_slow=26, window_fast=12, window_sign=9)
        g['MACD'] = macd.macd()
        g['MACD_signal'] = macd.macd_signal()
        g['MACD_hist'] = macd.macd_diff()

        bb = ta.volatility.BollingerBands(close=g['Close'], window=20, window_dev=2)
        g['BB_upper'] = bb.bollinger_hband()
        g['BB_lower'] = bb.bollinger_lband()
        g['BB_width'] = bb.bollinger_wband()

        # === Leading Indicators ===
        g['RSI_14'] = ta.momentum.RSIIndicator(close=g['Close'], window=14).rsi()
        g['MFI_14'] = ta.volume.MFIIndicator(high=g['High'], low=g['Low'], close=g['Close'], volume=g['Volume'], window=14).money_flow_index()
        g['STOCH_K'] = ta.momentum.StochasticOscillator(high=g['High'], low=g['Low'], close=g['Close']).stoch()
        g['STOCH_D'] = ta.momentum.StochasticOscillator(high=g['High'], low=g['Low'], close=g['Close']).stoch_signal()
        g['CCI'] = ta.trend.CCIIndicator(high=g['High'], low=g['Low'], close=g['Close'], window=20).cci()
        g['WILLIAMS_R'] = ta.momentum.WilliamsRIndicator(high=g['High'], low=g['Low'], close=g['Close'], lbp=14).williams_r()
        
        g.reset_index(inplace=True) 
        results.append(g) 

    # Step 4: Concatenate all symbols
    final_df = pd.concat(results, ignore_index=True)

    # ✅ Ensure 'Date' exists and filter latest date only
    if 'Date' in final_df.columns:
        latest_date = final_df['Date'].max()
        final_df = final_df[final_df['Date'] == latest_date]
    else:
        raise KeyError("❌ 'Date' column missing after groupby processing.")

    # Step 5: Select only final columns
    final_columns = [
        'Date', 'Symbol', 'Open', 'High', 'Low', 'Close', 'Volume',
        'EMA_20', 'EMA_50', 'MACD', 'MACD_signal', 'MACD_hist',
        'BB_upper', 'BB_lower', 'BB_width',
        'RSI_14', 'MFI_14', 'STOCH_K', 'STOCH_D', 'CCI', 'WILLIAMS_R'
    ]
    return final_df[final_columns]

In [8]:
def generate_signals(df: pd.DataFrame, signal_dir: str = "signal", save_scores: bool = False) -> None:
    """
    Generate Buy/Hold/Sell signals using improved graded scoring logic and save compressed CSV file.

    Args:
        df (pd.DataFrame): Input DataFrame containing technical indicators.
        signal_dir (str): Output directory to save signals.
        save_scores (bool): Whether to store leading/lagging scores in output for debugging.
    """

    df = df.copy()
    signals = []
    leading_score_list = []
    lagging_score_list = []

    for _, row in df.iterrows():
        symbol = row['Symbol']
        leading_score = 0
        lagging_score = 0

        # === Leading Indicators ===
        if row['RSI_14'] < 30:
            leading_score += 1
        if row['MFI_14'] < 20:
            leading_score += 1
        if row['STOCH_K'] < 20 and row['STOCH_K'] > row['STOCH_D']:
            leading_score += 1
        if row['CCI'] < -100:
            leading_score += 1
        if row['WILLIAMS_R'] < -80:
            leading_score += 1

        # === Lagging Indicators ===
        if row['EMA_20'] > row['EMA_50']:
            lagging_score += 1
        if row['MACD'] > row['MACD_signal']:
            lagging_score += 1
        # Use normalized BB width
        if row['BB_width'] / max(row['Close'], 1) > 0.06:
            lagging_score += 1
        # Optional: Add close > EMA_20 confirmation
        if row['Close'] > row.get('EMA_20', 0):
            lagging_score += 1

        # === Decision Logic ===
        if leading_score >= 3 and lagging_score >= 2:
            signal = 'BUY'
        elif leading_score <= 1 and lagging_score <= 1:
            signal = 'SELL'
        else:
            signal = 'HOLD'

        signals.append(signal)
        leading_score_list.append(leading_score)
        lagging_score_list.append(lagging_score)

    # Append signals and scores
    df['Signal'] = signals
    if save_scores:
        df['Leading_Score'] = leading_score_list
        df['Lagging_Score'] = lagging_score_list

    # Final column order
    output_cols = ['Date', 'Symbol', 'Open', 'High', 'Low', 'Close', 'Volume'] + \
                  [col for col in df.columns if col not in ['Date', 'Symbol', 'Open', 'High', 'Low', 'Close', 'Volume']]

    output_df = df[output_cols]
    output_df.sort_values(by=['Symbol'], inplace=True)

    # Save output
    last_date = pd.to_datetime(df['Date'].max())
    year_str = last_date.strftime("%Y")
    date_str = last_date.strftime("%Y%m%d")
    output_path = os.path.join(signal_dir, year_str)
    os.makedirs(output_path, exist_ok=True)
    file_name = f"{date_str}.csv"
    full_path = os.path.join(output_path, file_name)

    output_df.to_csv(full_path, index=False)
    print(f"✅ Signal file saved to: {full_path}")

    # Optional: print sample debug
    print("\n🔍 Sample Signal Debug:")
    print(output_df[['Symbol', 'Signal'] + (['Leading_Score', 'Lagging_Score'] if save_scores else [])].head(5))

In [9]:
def generate_signals_rolling(
    data_dir: str = "../data/nse/equity",
    window_size: int = 60,
    signal_dir: str = "../signal/nse/equity"
) -> None:
    """
    Walk backward through daily files using a rolling window to compute EOD signals.
    Saves signal for the latest date in each window, then pops and continues.
    Stops when < window_size files remain.
    """

    # Step 1: Sort latest to earliest
    all_files = list_csv_gz_files(data_dir)
    all_files = sorted(all_files, reverse=True)
    
          
    print(f"📂 Total files: {len(all_files)} | Starting rolling window generation...")

    # Step 2: Loop until fewer than 14 files remain
    while len(all_files) >= 14:
        
        try:
            # Step 3: Load data for this window
            df_window = load_rolling_window(all_files, window_size)

            # Step 4: Compute indicators, retain only last day per symbol
            df_indicators = compute_indicators(df_window)

            # Step 5: Generate and save signal
            generate_signals(df_indicators, signal_dir=signal_dir)

        except Exception as e:
            print(f"⚠️ Skipping date {all_files[0]} due to error: {e}")

        # Step 6: Pop the latest file (T), move window back one day
        all_files.pop(0)

    print("✅ Rolling signal generation complete.")


In [10]:
generate_signals_rolling()

📂 Total files: 174 | Starting rolling window generation...
✅ Signal file saved to: ../signal/nse/equity/2025/20250912.csv

🔍 Sample Signal Debug:
         Symbol Signal
30       0SCL26   SELL
130  1003SCL30A   SELL
133  1003SCL31P   SELL
153  1003SFIL28   SELL
212  1015SCFL29   SELL
✅ Signal file saved to: ../signal/nse/equity/2025/20250911.csv

🔍 Sample Signal Debug:
         Symbol Signal
108  1003ISFL28   HOLD
151  1003SFIL28   SELL
208   1015ECL29   HOLD
311   1015UCL27   SELL
362  1018GS2026   HOLD
✅ Signal file saved to: ../signal/nse/equity/2025/20250910.csv

🔍 Sample Signal Debug:
         Symbol Signal
6      0MOFSL26   SELL
29       0SCL26   SELL
108  1003ISFL28   HOLD
130  1003SCL30A   SELL
208   1015ECL29   HOLD
✅ Signal file saved to: ../signal/nse/equity/2025/20250909.csv

🔍 Sample Signal Debug:
         Symbol Signal
13     0MOFSL27   SELL
106  1003ISFL28   HOLD
148  1003SFIL28   SELL
204   1015ECL29   HOLD
276   1015UCL26   HOLD
✅ Signal file saved to: ../signal/nse/equ

In [None]:
base_dir = "../data/nse"
files = list_csv_gz_files(base_dir)
window_size = 60
df = load_rolling_window(files, windows_size)
combined_df = compute_indicators(df)
generate_signals(combined_df, "../signal")

In [None]:
display_scrollable_df(df, max_rows=30, max_cols=12, height="600px")