<a href="https://colab.research.google.com/github/sanozzz/QuantBacktests/blob/main/Trendlyne_Monthly_Rebalance(Zerodha).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
"""
STEP0::: IMPORTING LIBRARIES AND SETTING UP GLOBAL VARIABLES

"""
import os
import time
from pytz import timezone
import logging
import re
from kiteconnect import KiteConnect
import sys
sys.path.append('/content/vectorbt.pro')  # Add the path where your package is cloned
# Now, try importing your module
import vectorbtpro as vbt
import pandas as pd
import numpy as np
import json
from pandas.tseries.offsets import MonthBegin
import talib
from numba import njit
import os
import warnings
from datetime import datetime, timedelta
from pytz import timezone as tz
import requests

# Global variable

api_key = "lwhfs47ilbvwc84g"
access_token = "ozWrM7XdI0CUbzzFiZQwaUfpSmqkblXY"
max_month = 120


# Load the CSV file
input_file = "BacktestInput.csv"  # Replace with your actual file path
df = pd.read_csv(input_file, on_bad_lines='skip')


In [2]:
"""
STEP0::: SETUP LOGGER

"""
# Define IST timezone
IST = timezone('Asia/Kolkata')

class ISTFormatter(logging.Formatter):
    """
    Custom formatter to force log timestamps to use IST timezone.
    """
    def formatTime(self, record, datefmt=None):
        # Convert the record's created time to IST
        record_time = datetime.fromtimestamp(record.created).astimezone(IST)
        if datefmt:
            return record_time.strftime(datefmt)
        return record_time.isoformat()

def setup_logger(log_file: str, level=logging.INFO, file_mode='a') -> logging.Logger:
    """
    Sets up a logger with timestamps in IST for both file and console outputs.
    Ensures no duplicate log handlers are added.

    Args:
        log_file (str): Path to the log file.
        level (int): Logging level (e.g., logging.INFO, logging.DEBUG).
        file_mode (str): Mode to open the log file ('a' for append, 'w' for overwrite).

    Returns:
        logging.Logger: Configured logger instance.
    """
    logger = logging.getLogger("OrderBookGeneratorLogger")
    logger.setLevel(level)

    # Clear existing handlers
    if logger.hasHandlers():
        logger.handlers.clear()

    # Define the custom IST formatter
    formatter = ISTFormatter('%(asctime)s - %(levelname)s - %(message)s')

    # File handler
    file_handler = logging.FileHandler(log_file, mode=file_mode)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)

    # Console handler
    console_handler = logging.StreamHandler()
    console_handler.setFormatter(formatter)
    logger.addHandler(console_handler)

    return logger

# Set the global timezone for the process (if on POSIX system)
os.environ['TZ'] = 'Asia/Kolkata'
try:
    time.tzset()  # Apply timezone change globally (works on POSIX systems)
except AttributeError:
    # time.tzset() is not available on non-POSIX systems (e.g., Windows)
    pass

# Define the log file path
log_file_path = f"logfile_{datetime.now(IST).strftime('%Y-%m-%d')}.log"
logger = setup_logger(log_file_path, level=logging.INFO, file_mode='a')





"""
STEP1:::CODE FOR CONVERTING TRENDLYNE CSV TO JSON FILE

"""

# Step 1: Delete all rows with NaN values
df = df.dropna(how='all')

# Step 2: Drop unnecessary columns
columns_to_drop = [
    "Start Price", "End Price", "Change",
    "Avg Change %", "Weightage of Each stock %", "NAV (Initial 100)"
]
df = df.drop(columns=columns_to_drop, errors='ignore')

# Step 3: Forward fill the "As on Date" column for rows without a date
df['As on Date'] = df['As on Date'].fillna(method='ffill')

# Rename "As on Date" to "Date"
df.rename(columns={"As on Date": "Date"}, inplace=True)

# Step 4: Convert the date range (e.g., 2012-09-28 to 2012-10-31) to the first of the last date
df['Date'] = df['Date'].str.extract(r'to\s+(\d{4}-\d{2}-\d{2})')  # Extract the last date
df['Date'] = pd.to_datetime(df['Date'], errors='coerce') - MonthBegin(1)  # Convert to first of the month

# Step 5: Keep only rows where 'Stock' column is not blank
df = df.dropna(subset=['Stock'])

# Step 6: Extract the first part of the stock name and add 'NSE:' as prefix
df['Stock'] = df['Stock'].str.extract(r'^([A-Z&-]+)').fillna('')  # Extract first part
df['Stock'] = 'NSE:' + df['Stock']  # Add NSE prefix

# Step 7: Convert the cleaned DataFrame to a JSON structure
json_output = {}
for _, row in df.iterrows():
    date = row['Date'].strftime('%Y-%m-%d')  # Ensure date format
    stock = row['Stock']
    if date not in json_output:
        json_output[date] = {}
    json_output[date][stock] = 150000  # Assign default cash amount

# Save the final JSON to symbols.json
json_file = "symbols.json"
with open(json_file, "w") as f:
    json.dump(json_output, f, indent=4)

print(f"JSON file saved to {json_file}")



"""
STEP2:::ORDERBOOK GENERATOR CLASS FOR STGY
"""

# Define IST timezone
IST = tz('Asia/Kolkata')

class OrderBookGenerator:
    def __init__(self, logger, api_key, access_token, log_file, symbols_json_path='/content/symbols.json', default_cash=50000,
                 tp_ladder=[0.2,1], mult_factor=25, max_retries=5, retry_delay=2):
        """
        Initialize the OrderBookGenerator class.

        Args:
            logger: Logger object for logging.
            api_key (str): Kite API key.
            access_token (str): Kite access token.
            log_file (str): Path to the log file.
            symbols_json_path (str): Path to the JSON file containing symbol mappings.
            default_cash (int): Default initial cash value for symbols.
            tp_ladder (list): Take-profit ladder values.
            mult_factor (int): Multiplier factor for calculations.
            max_retries (int): Maximum retries for API calls.
            retry_delay (int): Delay between retries (in seconds).
        """
        self.logger = logger
        self.log_file = log_file
        self.tp_ladder = tp_ladder
        self.mult_factor = mult_factor
        self.max_retries = max_retries
        self.retry_delay = retry_delay
        self.default_cash = default_cash

        # Initialize KiteConnect
        try:
            self.kite = KiteConnect(api_key=api_key)
            self.kite.set_access_token(access_token)
            self.logger.info("KiteConnect initialized successfully.")
        except Exception as e:
            self.logger.error(f"Error initializing KiteConnect: {e}")
            raise e

        # Load symbols and their respective cash values
        self.symbols_cash_map = self.load_symbols_cash(symbols_json_path)
        self.target_month = self.get_target_month()
        self.symbols_list = self.get_symbols_for_target_month()

        # Fetch instrument data from Kite API
        self.instrument_df = self.fetch_instruments()

        warnings.filterwarnings('ignore')



    def fetch_instruments(self) -> pd.DataFrame:
        """
        Fetch the list of all NSE instruments from KiteConnect API.
        """
        try:
            self.logger.info("Fetching NSE instruments from KiteConnect...")
            instrument_dump = self.kite.instruments("NSE")
            instrument_df = pd.DataFrame(instrument_dump)
            instrument_df.to_csv("NSE_Instruments.csv", index=False)  # Save as CSV for reference
            self.logger.info("Fetched and saved NSE instrument dump.")
            return instrument_df
        except Exception as e:
            self.logger.error(f"Error fetching instruments: {e}")
            raise e



    def load_symbols_cash(self, symbols_json_path: str) -> dict:
        """
        Load the JSON file mapping months to symbols with cash values.
        """
        try:
            with open(symbols_json_path, 'r') as f:
                symbols_cash_map = json.load(f)
                self.logger.info(f"Loaded symbols JSON from {symbols_json_path}")
                return symbols_cash_map
        except FileNotFoundError:
            self.logger.error(f"Symbols JSON file not found: {symbols_json_path}. Exiting.")
            raise FileNotFoundError(f"Symbols JSON file not found: {symbols_json_path}")



    def instrument_lookup(self, symbol: str) -> int:
        """
        Looks up the instrument token for a given symbol from the instrument dump.
        """
        try:
            token = self.instrument_df[self.instrument_df['tradingsymbol'] == symbol].instrument_token.values[0]
            self.logger.info(f"Found instrument token for {symbol}: {token}")
            return token
        except IndexError:
            self.logger.warning(f"Instrument token not found for symbol: {symbol}.")
            return -1

    def fetch_ohlc_extended(self, ticker: str, from_date: str, interval: str) -> pd.DataFrame:
        """
        Fetch historical data from Zerodha's KiteConnect API using extended logic to handle 100-day limit.

        Args:
            ticker (str): The trading symbol.
            from_date (str): Start date in 'YYYY-MM-DD' format.
            interval (str): Data interval (e.g., '15minute', 'day').

        Returns:
            pd.DataFrame: A DataFrame containing OHLCV data.
        """
        instrument = self.instrument_lookup(ticker)
        if instrument == -1:
            self.logger.warning(f"Instrument token not found for ticker {ticker}.")
            return pd.DataFrame()

        start_date = dt.datetime.strptime(from_date, '%Y-%m-%d')
        end_date = dt.datetime.today()

        self.logger.info(f"Fetching {interval} data for {ticker} from {start_date.date()} to {end_date.date()}.")

        data = pd.DataFrame(columns=['date', 'open', 'high', 'low', 'close', 'volume'])

        while start_date < end_date:
            fetch_end_date = min(start_date + dt.timedelta(days=100), end_date)
            try:
                data_chunk = pd.DataFrame(
                    self.kite.historical_data(instrument, start_date, fetch_end_date, interval)
                )
                data = pd.concat([data, data_chunk], ignore_index=True)
            except Exception as e:
                self.logger.error(f"Error fetching data for {ticker} from {start_date.date()} to {fetch_end_date.date()}: {e}")
                break
            start_date = fetch_end_date

        if not data.empty:
            data.set_index("date", inplace=True)
            self.logger.info(f"Fetched {len(data)} rows of data for {ticker}.")
            return data[['open', 'high', 'low', 'close', 'volume']]

        self.logger.warning(f"No data found for ticker {ticker}.")
        return pd.DataFrame()


    def fetch_data(self, scrip: str, from_date: str, to_date: str, interval: str = '15minute') -> pd.DataFrame:
        """
        Wrapper for fetching historical data for a specific symbol.

        Args:
            scrip (str): The trading symbol (e.g., "RELIANCE").
            from_date (str): Start date in 'YYYY-MM-DD' format.
            to_date (str): End date in 'YYYY-MM-DD' format.
            interval (str): Data interval (e.g., '15minute', 'day').

        Returns:
            pd.DataFrame: A DataFrame containing OHLCV data.
        """
        stripped_scrip = scrip.replace("NSE:", "")
        return self.fetch_ohlc_extended(stripped_scrip, from_date, interval)






    def get_target_month(self) -> str:
            """
            Determine the target month from the JSON (format yyyy-mm-01).
            """
            today = datetime.now(IST)
            target_month = (today + timedelta(days=1)).strftime('%Y-%m-01')  # yyyy-mm-01 for next month's 1st date
            if target_month in self.symbols_cash_map:
                self.logger.info(f"Processing data for the target month: {target_month}")
                return target_month
            else:
                self.logger.error(f"No data found for the target month: {target_month}. Exiting.")
                raise ValueError(f"No data found for the target month: {target_month}")


    def get_symbols_for_target_month(self) -> list:
        """
        Extract symbols for the target month.
        """
        return list(self.symbols_cash_map[self.target_month].keys())


    @staticmethod
    @njit
    def get_final_bands_nb(close, upper, lower):
        trend = np.full(close.shape, np.nan)
        dir_ = np.full(close.shape, 1)
        long = np.full(close.shape, np.nan)
        short = np.full(close.shape, np.nan)

        for i in range(1, close.shape[0]):
            if close[i] > upper[i - 1]:
                dir_[i] = 1
            elif close[i] < lower[i - 1]:
                dir_[i] = -1
            else:
                dir_[i] = dir_[i - 1]
                if dir_[i] > 0 and lower[i] < lower[i - 1]:
                    lower[i] = lower[i - 1]
                if dir_[i] < 0 and upper[i] > upper[i - 1]:
                    upper[i] = upper[i - 1]

            if dir_[i] > 0:
                trend[i] = long[i] = lower[i]
            else:
                trend[i] = short[i] = upper[i]

        return trend, dir_, long, short

    @staticmethod
    def get_basic_bands(med_price, atr, multiplier):
        matr = multiplier * atr
        upper = med_price + matr
        lower = med_price - matr
        return upper, lower

    def faster_supertrend_talib(self, high, low, close, period=7, multiplier=3):
        avg_price = talib.MEDPRICE(high.flatten(), low.flatten())
        atr = talib.ATR(high.flatten(), low.flatten(), close.flatten(), period)
        upper, lower = self.get_basic_bands(avg_price, atr, multiplier)
        return self.get_final_bands_nb(close, upper, lower)


    @staticmethod
    def shift_array(arr: np.ndarray) -> np.ndarray:
        shifted = np.zeros_like(arr)
        shifted[1:] = arr[:-1]
        return shifted



    def MTF_Trend(self, high: pd.Series, low: pd.Series, close: pd.Series) -> np.ndarray:
        # Resample to weekly and daily closes
        close_1W = close.resample("1w").last()
        close_1D = close.resample("1d").last().dropna().shift(1)

        # Resample weekly and daily data to 15m
        resampler_w = vbt.Resampler(close_1W.index, close.index, source_freq="W-SUN", target_freq="15m")
        resampler_d = vbt.Resampler(close_1D.index, close.index, source_freq="1d", target_freq="15m")

        # Compute multiple SuperTrend indicators
        _, superd_1, _, _ = self.faster_supertrend_talib(high.values, low.values, close.values, 7, 3)
        _, superd_2, _, _ = self.faster_supertrend_talib(high.values, low.values, close.values, 10, 3)
        _, superd_3, _, _ = self.faster_supertrend_talib(high.values, low.values, close.values, 11, 2)

        # Combine SuperTrend directions
        concatenated = superd_1 + superd_2 + superd_3
        shifted = self.shift_array(concatenated)

        # Determine strong trend conditions
        condition_3 = (concatenated == 3) & (shifted != 3)
        condition_neg_3 = (concatenated == -3) & (shifted != -3)

        # Define short-term trend
        ST_Trend = np.where(condition_3, 1, 0)
        ST_Trend = np.where(condition_neg_3, -1, ST_Trend)

        # Weekly SMA and MACD-based conditions
        SMA_1W = vbt.talib("SMA").run(close_1W, timeperiod=20).real
        MACD = vbt.talib_func("MACD")
        MACD_Hist = MACD(close_1W, 12, 26, 9)[2]
        MACD_Positive = MACD_Hist > 0

        Cls1W_Abv_SMA = close_1W.vbt >= SMA_1W

        # Daily SMA alignment
        SMA20_1D = vbt.talib("SMA").run(close_1D, timeperiod=20).real
        Close15mBelow20DMA = close.vbt <= vbt.talib("SMA").run(close_1D, timeperiod=20).real.vbt.realign(resampler_d)

        # Long-term trend (weekly SMA and MACD alignment)
        LT_Trend = np.where(
            np.logical_and(
                MACD_Positive.vbt.realign(resampler_w).to_numpy(),
                Cls1W_Abv_SMA.vbt.realign(resampler_w).to_numpy()
            ),
            1,
            0
        )

        # Combine short-term and long-term trends
        MTF_Trend = np.where((LT_Trend == 1) & (ST_Trend == 1), 1, 0)
        MTF_Trend = np.where(Close15mBelow20DMA.to_numpy(), -1, MTF_Trend)

        return MTF_Trend


    def fetch_data(self, scrip: str, from_date: str, to_date: str, interval: str = '15minute') -> pd.DataFrame:
        """
        Fetch historical candle data using KiteConnect API.

        Args:
            scrip (str): The trading symbol to fetch data for.
            from_date (str): Start date in 'YYYY-MM-DD' format.
            to_date (str): End date in 'YYYY-MM-DD' format.
            interval (str): Timeframe for the candles. Default is '15minute'.

        Returns:
            pd.DataFrame: DataFrame with OHLCV data.
        """
        try:
            # Strip "NSE:" prefix if present
            stripped_scrip = scrip.replace("NSE:", "")

            # Retrieve the instrument token
            instrument_token = self.instrument_lookup(stripped_scrip)
            if instrument_token == -1:
                self.logger.warning(f"Instrument token not found for {scrip}.")
                return None

            self.logger.info(f"Fetching {interval} data for {stripped_scrip} from {from_date} to {to_date}.")

            start_date = datetime.strptime(from_date, '%Y-%m-%d')
            end_date = datetime.strptime(to_date, '%Y-%m-%d')
            data = pd.DataFrame(columns=['date', 'open', 'high', 'low', 'close', 'volume'])

            # Fetch data in chunks to handle API limitations
            while start_date < end_date:
                chunk_end_date = min(start_date + timedelta(days=100), end_date)
                try:
                    chunk = pd.DataFrame(
                        self.kite.historical_data(
                            instrument_token,
                            from_date=start_date,
                            to_date=chunk_end_date,
                            interval=interval
                        )
                    )
                    data = pd.concat([data, chunk], ignore_index=True)
                except Exception as e:
                    self.logger.error(f"Error fetching data for {scrip} between {start_date.date()} and {chunk_end_date.date()}: {e}")
                    break
                start_date = chunk_end_date

            if data.empty:
                self.logger.warning(f"No data available for {scrip}.")
                return None

            # Post-process the data
            data['date'] = pd.to_datetime(data['date'])
            data.set_index('date', inplace=True)

            self.logger.info(f"Successfully fetched {len(data)} rows for {scrip}.")
            return data[['open', 'high', 'low', 'close', 'volume']]
        except Exception as e:
            self.logger.error(f"Error fetching data for {scrip}: {e}")
            return None




    def OrderGenerator(self, scrip: str, from_date: str, to_date: str):
        """
        Generate orders for a specific scrip within the given date range.

        Args:
            scrip (str): The symbol for which to generate orders.
            from_date (str): Start date in 'YYYY-MM-DD' format.
            to_date (str): End date in 'YYYY-MM-DD' format.

        Returns:
            pd.DataFrame or None: A DataFrame of trades if successful, else None.
        """
        # Retrieve `init_cash` value for the current scrip and target month
        init_cash = self.symbols_cash_map.get(self.target_month, {}).get(scrip, self.default_cash)
        self.logger.info(f"Processing {scrip} for the target month {self.target_month} with init_cash={init_cash}.")

        # Fetch data for the specified range
        # Ensure `to_date` does not exceed today's date
        today = datetime.now(IST).strftime('%Y-%m-%d')
        to_date = min(to_date, today)

        data = self.fetch_data(scrip, from_date, to_date, interval='15minute')

        if data is None or data.empty:
            self.logger.warning(f"No data available for {scrip} during {from_date} to {to_date}. Skipping.")
            return None

        self.logger.info(f"Data successfully fetched for {scrip} with {len(data)} rows.")

        # Ensure required columns exist
        if not {'open', 'high', 'low', 'close'}.issubset(data.columns):
            self.logger.error(f"Data for {scrip} is missing required columns. Skipping.")
            return None

        high, low, close = data['high'], data['low'], data['close']

        # Generate signals using SuperTrend
        Signals = vbt.IF(
            class_name='SuperTrend',
            short_name='st',
            input_names=['high', 'low', 'close'],
            param_names=[],
            output_names=['value']
        ).with_apply_func(
            self.MTF_Trend, keep_pd=True
        ).run(high, low, close)

        # Create portfolio based on generated signals
        pf = vbt.Portfolio.from_signals(
            data,
            long_entries=(Signals.value == 1),
            long_exits=(Signals.value == -1),
            stop_exit_price="close",
            sl_stop=data['low'].vbt.ago(125),
            stop_ladder="uniform",
            tp_stop=vbt.Param([self.tp_ladder], keys=["tp_ladder_1"]),
            tsl_stop=0.2,
            fees=0.0015,  # 0.15% of turnover
            slippage=0.0005,  # 0.05% slippage
            size_granularity=1,
            max_size=init_cash,
            init_cash=init_cash,
            freq='15m'
        )

        # Extract readable trades data
        df = pf.trades.readable
        df["Column"] = scrip

        # Validate the DataFrame's structure
        if df.empty or 'Entry Index' not in df.columns:
            self.logger.warning(f"No trades generated or 'Entry Index' missing for {scrip}.")
            return None

        self.logger.info(f"Processing completed for {scrip} in target month {self.target_month}.")
        return df


    def calculate_days_difference(self):
        """
        Calculate the number of days from the target month's start to today, multiplied by a factor.
        """
        # Ensure the current date is timezone-aware
        today_date = datetime.now(IST)

        # Convert target_month to a timezone-aware datetime
        target_date = datetime.strptime(self.target_month, "%Y-%m-%d").replace(tzinfo=IST)

        # Perform subtraction and calculate the difference in days
        difference_days = (today_date - target_date).days

        return difference_days * self.mult_factor


    def exponential_backoff(self, function, *args, **kwargs):
        retries = 0
        delay = self.retry_delay
        while retries < self.max_retries:
            try:
                return function(*args, **kwargs)
            except Exception as e:
                retries += 1
                self.logger.error(f"Attempt {retries} failed with error: {e}")
                if retries >= self.max_retries:
                    raise e
                time.sleep(delay)
                delay *= 2



    def get_orderbook(self, max_months=12):
        """
        Generate the order book by processing the latest `max_months` months in the JSON.
        Runs OrderGenerator separately for each symbol for each target month.
        """
        final_results = []  # List to store results for all months

        # Get sorted months from the JSON keys (latest first)
        sorted_months = sorted(self.symbols_cash_map.keys(), key=lambda x: datetime.strptime(x, "%Y-%m-%d"), reverse=True)
        sorted_months = sorted_months[:max_months]  # Process only the first `max_months` months

        # Process each target month
        for target_month in sorted_months:
            self.logger.info(f"Processing data for the target month: {target_month}")
            self.target_month = target_month  # Set the target month for the current loop

            month_start = IST.localize(datetime.strptime(target_month, "%Y-%m-%d"))
            from_date = (month_start - timedelta(days=365)).strftime('%Y-%m-%d')  # Start 365 days before the target month
            to_date = (month_start + timedelta(days=90)).strftime('%Y-%m-%d')  # End 90 days after the target month

            # Ensure the `to_date` does not exceed today's date
            today = datetime.now(IST).strftime('%Y-%m-%d')
            to_date = min(to_date, today)

            # Process each symbol for the target month
            if target_month not in self.symbols_cash_map:
                self.logger.warning(f"No data found for the target month: {target_month}. Skipping.")
                continue

            symbols = self.symbols_cash_map[target_month].keys()
            dfs = []
            for symbol in symbols:
                self.logger.info(f"Running OrderGenerator for symbol: {symbol} in month: {target_month}.")

                # Fetch data and generate orders
                df = self.OrderGenerator(symbol, from_date=from_date, to_date=to_date)
                if df is not None and not df.empty:
                    # Filter rows for the target month
                    filtered_df = df[
                        (df['Entry Index'] >= month_start) &
                        (df['Entry Index'] < month_start + timedelta(days=31))
                    ]

                    if not filtered_df.empty:
                        dfs.append(filtered_df)

            if dfs:
                # Combine all symbols' dataframes for the current month
                final_results.append(pd.concat(dfs, ignore_index=True))
            else:
                self.logger.warning(f"No valid orders found for target month: {target_month}. Skipping.")

        if not final_results:
            self.logger.info("No valid orders found for the specified months. Returning an empty orderbook.")
            return pd.DataFrame()

        # Combine results for all processed months into a single DataFrame
        final_orderbook = pd.concat(final_results, ignore_index=True)

        # Log summary instead of full data
        self.logger.info(f"Order book generated successfully! Total trades: {len(final_orderbook)}")
        return final_orderbook



"""
STEP3:::TRADE SUMMARY GENERATOR CLASS
"""



class TradeSummaryGenerator:
    def __init__(self, logger):
        """
        Initialize the TradeSummaryGenerator with Zerodha API credentials.
        """
        self.logger = logger
        # Default Zerodha API credentials
        self.api_key = "lwhfs47ilbvwc84g"
        self.access_token = "ozWrM7XdI0CUbzzFiZQwaUfpSmqkblXY"

        # Initialize KiteConnect
        self.kite = KiteConnect(api_key=self.api_key)
        self.kite.set_access_token(self.access_token)

    def fetch_equal_weighted_returns_monthly(self, symbols, month):
        """
        Fetch monthly candle data for the given symbols and calculate equal-weighted returns.
        Fallback to vbt.TVData if data is not available on Zerodha.
        """
        try:
            self.logger.info(f"Fetching equal-weighted returns for month: {month}")
            monthly_returns = []

            if not symbols:
                self.logger.warning(f"No symbols provided for month: {month}")
                return np.nan

            # Convert month to datetime objects
            current_month_start = pd.Timestamp(f"{month}-01", tz="Asia/Kolkata")
            previous_month_start = current_month_start - pd.offsets.MonthBegin(1)
            current_month_end = current_month_start + pd.offsets.MonthEnd(0)

            for symbol in symbols:
                try:
                    self.logger.info(f"Pulling data for symbol: {symbol}")
                    instrument_token = self.get_instrument_token(symbol.replace("NSE:", ""))

                    if instrument_token:
                        # Fetch data using Zerodha API
                        data = self.kite.historical_data(
                            instrument_token=instrument_token,
                            from_date=previous_month_start.to_pydatetime(),
                            to_date=current_month_end.to_pydatetime(),
                            interval="day"
                        )

                        # Convert data to DataFrame
                        df = pd.DataFrame(data)
                        if df.empty:
                            self.logger.warning(f"No data available for symbol: {symbol}")
                            continue

                        df['date'] = pd.to_datetime(df['date'])
                        if df['date'].dt.tz is None:
                            df['date'] = df['date'].dt.tz_localize('UTC').dt.tz_convert('Asia/Kolkata')
                        else:
                            df['date'] = df['date'].dt.tz_convert('Asia/Kolkata')

                        df.set_index('date', inplace=True)

                        # Get Close prices for the current and previous months
                        previous_month_close = df.loc[df.index.to_period('M') == previous_month_start.to_period('M'), 'close']
                        current_month_close = df.loc[df.index.to_period('M') == current_month_start.to_period('M'), 'close']

                    else:
                        # Fallback to vbt.TVData if no instrument token is found
                        self.logger.warning(f"Instrument token not found for symbol: {symbol}. Using vbt.TVData.")
                        data = vbt.TVData.pull(symbol, timeframe="1M", tz="GMT+5:30")
                        close = data.get("Close")

                        if close.index.tz is None:
                            close = close.tz_localize("UTC").tz_convert("Asia/Kolkata")
                        else:
                            close = close.tz_convert("Asia/Kolkata")

                        previous_month_close = close.loc[close.index.to_period('M') == previous_month_start.to_period('M')]
                        current_month_close = close.loc[close.index.to_period('M') == current_month_start.to_period('M')]

                    if not previous_month_close.empty and not current_month_close.empty:
                        return_value = current_month_close.iloc[-1] / previous_month_close.iloc[-1] - 1
                        monthly_returns.append(return_value)
                        self.logger.info(f"Return for symbol {symbol}: {return_value:.4f}")
                    else:
                        self.logger.warning(f"Skipping symbol due to insufficient data: {symbol}, month: {month}")

                except Exception as e:
                    self.logger.error(f"Error fetching data for symbol: {symbol}, error: {e}")

            # Calculate equal-weighted return
            if monthly_returns:
                result = round(np.mean(monthly_returns), 4)
                self.logger.info(f"Equal-weighted return for month {month}: {result}")
                return result
            else:
                self.logger.warning(f"No valid returns calculated for month: {month}")
                return np.nan
        except Exception as e:
            self.logger.error(f"Error fetching equal-weighted returns for month: {month}, error: {e}")
            return np.nan

    def fetch_and_save_instruments(self, file_path="instruments.csv"):
        """
        Fetch all instruments from Zerodha and save them locally as a CSV file.
        """
        try:
            self.logger.info("Fetching all instruments from Zerodha...")
            instruments = self.kite.instruments("NSE")
            instruments_df = pd.DataFrame(instruments)
            instruments_df.to_csv(file_path, index=False)
            self.logger.info(f"Fetched and saved {len(instruments_df)} instruments to {file_path}.")
        except Exception as e:
            self.logger.error(f"Error fetching instruments: {e}")

    def load_instruments(self, file_path="instruments.csv"):
        """
        Load instruments from a local CSV file into a DataFrame.
        Fetch from Zerodha and save locally if the file does not exist.
        """
        try:
            self.logger.info(f"Loading instruments from {file_path}...")
            instruments_df = pd.read_csv(file_path)
            self.instrument_lookup = pd.Series(
                instruments_df["instrument_token"].values, index=instruments_df["tradingsymbol"]
            ).to_dict()
            self.logger.info(f"Loaded {len(self.instrument_lookup)} instruments from {file_path}.")
        except FileNotFoundError:
            self.logger.warning(f"{file_path} not found. Fetching from Zerodha...")
            self.fetch_and_save_instruments(file_path)
            self.load_instruments(file_path)
        except Exception as e:
            self.logger.error(f"Error loading instruments from {file_path}: {e}")
            self.instrument_lookup = {}



    def get_instrument_token(self, symbol):
        """
        Retrieve the instrument token for a given symbol from the cached instruments.
        """
        try:
            # Remove the 'NSE:' prefix if it exists
            clean_symbol = symbol.replace("NSE:", "")

            if not hasattr(self, 'instrument_lookup') or not self.instrument_lookup:
                self.load_instruments()

            instrument_token = self.instrument_lookup.get(clean_symbol)
            if instrument_token:
                return instrument_token
            else:
                self.logger.warning(f"Instrument token not found for symbol: {symbol}")
                return None
        except Exception as e:
            self.logger.error(f"Error fetching instrument token for symbol: {symbol}, error: {e}")
            return None




    def fetch_cnxsmallcap_returns(self):
        """
        Fetch monthly adjusted close prices for CNXSMALLCAP using Zerodha API and calculate monthly returns.
        Fallback to vbt.TVData if data is not available on Zerodha.
        """
        try:
            self.logger.info("Fetching CNXSMALLCAP returns...")
            instrument_token = self.get_instrument_token("CNXSMALLCAP")
            if not instrument_token:
                self.logger.warning("Instrument token not found for CNXSMALLCAP. Using vbt.TVData fallback.")
                # Fallback to vbt.TVData
                data = vbt.TVData.pull("NSE:CNXSMALLCAP", timeframe="1M", tz="GMT+5:30")
                close = data.get("Close")

                # Ensure timezone-awareness
                if close.index.tz is None:
                    close = close.tz_localize("UTC").tz_convert("Asia/Kolkata")
                else:
                    close = close.tz_convert("Asia/Kolkata")

                # Calculate monthly returns
                cnxsmallcap_returns = close.pct_change().rename("CNXSMALLCAP_Returns").to_frame()
                cnxsmallcap_returns['Month'] = cnxsmallcap_returns.index.to_period('M')
                self.logger.info("CNXSMALLCAP returns fetched using vbt.TVData.")
                return cnxsmallcap_returns

            # Fetch data using Zerodha API
            data = self.kite.historical_data(
                instrument_token=instrument_token,
                from_date=(datetime.now() - pd.DateOffset(years=5)).to_pydatetime(),
                to_date=datetime.now(),
                interval="month"
            )

            # Convert data to DataFrame
            df = pd.DataFrame(data)
            if df.empty:
                self.logger.warning("No data available for CNXSMALLCAP.")
                return pd.DataFrame()

            df['date'] = pd.to_datetime(df['date']).dt.tz_localize('UTC').dt.tz_convert('Asia/Kolkata')
            df.set_index('date', inplace=True)

            # Calculate monthly returns
            cnxsmallcap_returns = df['close'].pct_change().rename("CNXSMALLCAP_Returns").to_frame()
            cnxsmallcap_returns['Month'] = cnxsmallcap_returns.index.to_period('M')
            self.logger.info("CNXSMALLCAP returns fetched successfully.")
            return cnxsmallcap_returns
        except Exception as e:
            self.logger.error(f"Error fetching CNXSMALLCAP data: {e}")
            return pd.DataFrame()


    def generate_summary(self, tradebook_df, symbols_data, cnxsmallcap_returns):
        """
        Generate a summary of trade metrics and merge with CNXSMALLCAP returns and cumulative equal-weighted returns.
        Include an "Overall" row summarizing all months.
        """
        try:
            self.logger.info("Generating summary from the tradebook...")

            # Ensure 'Month' column exists
            if 'Month' not in tradebook_df.columns:
                if 'Entry Index' in tradebook_df.columns:
                    self.logger.info("Creating 'Month' column from 'Entry Index'.")
                    tradebook_df['Entry Index'] = pd.to_datetime(tradebook_df['Entry Index'], errors='coerce')
                    tradebook_df['Month'] = tradebook_df['Entry Index'].dt.to_period('M')  # Extract month as Period[M]
                else:
                    self.logger.error("Missing required 'Entry Index' column to create 'Month'.")
                    raise KeyError("The 'Month' column is missing, and 'Entry Index' is not available to create it.")

            # Group by 'Month' and calculate metrics
            summary = tradebook_df.groupby('Month').apply(self.calculate_metrics).reset_index()

            # Add number of scrips from the symbols.json
            summary['Num_Scrips_Input'] = summary['Month'].apply(
                lambda x: len(symbols_data.get(str(x) + "-01", {}))
            )

            # Add interest adjustment for Num_Scrips_Input < 20
            summary['Interest_PnL_Adjustment'] = summary['Num_Scrips_Input'].apply(
                lambda x: 150_000 * 0.01 * (20 - x) if x < 20 else 0
            )

            # Add interest adjustment to Month_PnL
            summary['Adjusted_Month_PnL'] = summary['Month_PnL'] + summary['Interest_PnL_Adjustment']

            # Rename Adjusted_Month_PnL to Final_Month_PnL
            summary.rename(columns={'Adjusted_Month_PnL': 'Final_Month_PnL'}, inplace=True)

            # Calculate Strategy_Return with the new denominator
            summary['Strategy_Return'] = round(summary['Final_Month_PnL'] / 3_000_000, 4)

            # Calculate equal-weighted returns (without cost adjustment)
            summary['Equal_Weighted_Return'] = summary['Month'].apply(
                lambda month: self.fetch_equal_weighted_returns_monthly(
                    symbols_data.get(str(month) + "-01", {}).keys(), str(month)
                )
            )

            # Calculate equal-weighted returns (cost-adjusted by 0.4%)
            summary['Equal_Weighted_Return_Cost_Adjusted'] = summary['Equal_Weighted_Return'] - 0.004

            # Calculate cumulative equal-weighted returns
            summary['Cumulative_Equal_Weighted_Return'] = round(
                (1 + summary['Equal_Weighted_Return']).cumprod() - 1, 4
            )

            # Calculate cumulative equal-weighted returns (cost-adjusted by 0.4%)
            summary['Cumulative_Equal_Weighted_Return_Cost_Adjusted'] = round(
                (1 + summary['Equal_Weighted_Return_Cost_Adjusted']).cumprod() - 1, 4
            )

            # Add cumulative returns with reinvestments
            summary['Strategy_Cumulative_Return_With_Reinvestments'] = round(
                (1 + summary['Strategy_Return']).cumprod() - 1, 4
            )

            # Merge the CNXSMALLCAP monthly returns into the summary
            summary = summary.merge(cnxsmallcap_returns, on='Month', how='left')

            # Calculate cumulative returns for both CNXSMALLCAP and the strategy
            summary['Cumulative_Strategy_Return'] = round((1 + summary['Strategy_Return']).cumprod() - 1, 4)
            summary['Cumulative_CNXSMALLCAP_Return'] = round((1 + summary['CNXSMALLCAP_Returns']).cumprod() - 1, 4)

            # Calculate drawdowns
            summary['Max_Cumulative_Strategy_Return'] = summary['Cumulative_Strategy_Return'].cummax()
            summary['Strategy_Drawdown'] = round(
                (summary['Cumulative_Strategy_Return'] - summary['Max_Cumulative_Strategy_Return'])
                / summary['Max_Cumulative_Strategy_Return'], 4
            )

            summary['Max_Cumulative_CNXSMALLCAP_Return'] = summary['Cumulative_CNXSMALLCAP_Return'].cummax()
            summary['CNXSMALLCAP_Drawdown'] = round(
                (summary['Cumulative_CNXSMALLCAP_Return'] - summary['Max_Cumulative_CNXSMALLCAP_Return'])
                / summary['Max_Cumulative_CNXSMALLCAP_Return'], 4
            )

            # Drop unnecessary columns
            summary = summary.drop(columns=[
                'Max_Cumulative_Strategy_Return',
                'Max_Cumulative_CNXSMALLCAP_Return'
            ], errors='ignore')

            # Calculate overall metrics and add as "Overall" row
            overall_metrics = self.calculate_metrics(tradebook_df)
            overall_metrics['Final_Month_PnL'] = summary['Final_Month_PnL'].sum()  # Add Overall Final Month PnL
            overall_row = pd.DataFrame(overall_metrics).T
            overall_row['Month'] = "Overall"

            # Add "Overall" row to the top of the DataFrame
            summary = pd.concat([overall_row, summary], ignore_index=True)

            # Reorder columns to make Month the first column and Final_Month_PnL the second column
            cols = ['Month', 'Final_Month_PnL'] + [col for col in summary.columns if col not in ['Month', 'Final_Month_PnL']]
            summary = summary[cols]

            self.logger.info("Summary generated successfully.")
            return summary

        except Exception as e:
            self.logger.error(f"Error generating summary: {e}")
            return pd.DataFrame()


    def calculate_metrics(self, group):
        """
        Calculate additional metrics for the tradebook summary.
        """
        self.logger.info("Calculating metrics for a trade group...")
        winning_trades = group[group['PnL'] > 0]
        losing_trades = group[group['PnL'] < 0]

        metrics = {
            'Month_PnL': round(group['PnL'].sum(), 2),
            'Month_Trades': len(group),
            'Unique_Scrips': group['Column'].nunique(),
            'Max_Loss': round(group['PnL'].min(), 2),
            'Max_Profit': round(group['PnL'].max(), 2),
            'Median_Profit': round(winning_trades['PnL'].median(), 2) if not winning_trades.empty else 0,
            'Median_Loss': round(losing_trades['PnL'].median(), 2) if not losing_trades.empty else 0,
            'Win_Rate': round(len(winning_trades) / len(group), 2) if len(group) > 0 else 0,
            'Profit_Factor': round(winning_trades['PnL'].sum() / abs(losing_trades['PnL'].sum()), 2) if not losing_trades.empty else float('inf'),
            'R_Multiple': round((winning_trades['Return'].mean() / abs(losing_trades['Return'].mean())), 2) if not losing_trades.empty else float('inf'),
        }
        self.logger.info(f"Metrics calculated: {metrics}")
        return pd.Series(metrics)



  df['As on Date'] = df['As on Date'].fillna(method='ffill')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Stock'] = df['Stock'].str.extract(r'^([A-Z&-]+)').fillna('')  # Extract first part
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Stock'] = 'NSE:' + df['Stock']  # Add NSE prefix


JSON file saved to symbols.json


In [3]:
"""

FINAL STEP ::: CALLING THE SPECIFIC INSTANCES

"""

log_file_path = f"logfile_{datetime.now(IST).strftime('%Y-%m-%d')}.log"

# Setup logger
logger = logging.getLogger("OrderBookGeneratorLogger")
logger.setLevel(logging.INFO)
console_handler = logging.StreamHandler()
logger.addHandler(console_handler)



# Instantiate the class
OBGenerator = OrderBookGenerator(
    logger=logger,
    log_file=log_file_path,
    symbols_json_path='/content/symbols.json',
    api_key = api_key,
    access_token=access_token
)

# Test if get_orderbook is recognized
if hasattr(OBGenerator, 'get_orderbook'):
    print("get_orderbook method exists!")
else:
    print("get_orderbook method is missing!")

# Generate the order book
try:
    df = OBGenerator.get_orderbook(max_months=max_month)
    print("Order book generated successfully!")
    print(df.head())
except AttributeError as e:
    print(f"AttributeError: {e}")
except Exception as e:
    print(f"Unexpected error: {e}")


# Ensure datetime columns are properly formatted
df["Entry Index"] = pd.to_datetime(df["Entry Index"])
df["Exit Index"] = pd.to_datetime(df["Exit Index"], errors='coerce')  # Allow for NaT for open trades

# The final DataFrame `df` contains the complete order book for the latest 12 months
df.to_csv('tradebook.csv')


# Create an instance of the class
summary_generator = TradeSummaryGenerator(logger)

# Example inputs
tradebook_df = pd.read_csv("tradebook.csv")  # Load the tradebook data
symbols_data = json.load(open("symbols.json"))  # Load the symbols.json
cnxsmallcap_returns = summary_generator.fetch_cnxsmallcap_returns()

# Generate the summary
summary = summary_generator.generate_summary(tradebook_df, symbols_data, cnxsmallcap_returns)

# Save the summary to a CSV
summary.to_csv("summary.csv", index=False)
logger.info("Summary saved to summary.csv.")



summary


2024-12-30T18:19:49.076896+05:30 - INFO - KiteConnect initialized successfully.
KiteConnect initialized successfully.
INFO:OrderBookGeneratorLogger:KiteConnect initialized successfully.
2024-12-30T18:19:49.094077+05:30 - INFO - Loaded symbols JSON from /content/symbols.json
Loaded symbols JSON from /content/symbols.json
INFO:OrderBookGeneratorLogger:Loaded symbols JSON from /content/symbols.json
2024-12-30T18:19:49.106551+05:30 - INFO - Processing data for the target month: 2024-12-01
Processing data for the target month: 2024-12-01
INFO:OrderBookGeneratorLogger:Processing data for the target month: 2024-12-01
2024-12-30T18:19:49.113560+05:30 - INFO - Fetching NSE instruments from KiteConnect...
Fetching NSE instruments from KiteConnect...
INFO:OrderBookGeneratorLogger:Fetching NSE instruments from KiteConnect...
2024-12-30T18:19:50.917229+05:30 - INFO - Fetched and saved NSE instrument dump.
Fetched and saved NSE instrument dump.
INFO:OrderBookGeneratorLogger:Fetched and saved NSE ins

get_orderbook method exists!


2024-12-30T18:19:53.703224+05:30 - INFO - Successfully fetched 6593 rows for NSE:PAYTM.
Successfully fetched 6593 rows for NSE:PAYTM.
INFO:OrderBookGeneratorLogger:Successfully fetched 6593 rows for NSE:PAYTM.
2024-12-30T18:19:53.717809+05:30 - INFO - Data successfully fetched for NSE:PAYTM with 6593 rows.
Data successfully fetched for NSE:PAYTM with 6593 rows.
INFO:OrderBookGeneratorLogger:Data successfully fetched for NSE:PAYTM with 6593 rows.
2024-12-30T18:19:58.210306+05:30 - INFO - Processing completed for NSE:PAYTM in target month 2024-12-01.
Processing completed for NSE:PAYTM in target month 2024-12-01.
INFO:OrderBookGeneratorLogger:Processing completed for NSE:PAYTM in target month 2024-12-01.
2024-12-30T18:19:58.218389+05:30 - INFO - Running OrderGenerator for symbol: NSE:FSL in month: 2024-12-01.
Running OrderGenerator for symbol: NSE:FSL in month: 2024-12-01.
INFO:OrderBookGeneratorLogger:Running OrderGenerator for symbol: NSE:FSL in month: 2024-12-01.
2024-12-30T18:19:58.22

Order book generated successfully!
   Exit Trade Id          Column   Size  Entry Order Id  \
0             12       NSE:PAYTM  184.0              21   
1             13       NSE:PAYTM  179.0              23   
2             12         NSE:FSL  468.0              22   
3              7  NSE:JUBLPHARMA  166.0              12   
4             18      NSE:KAYNES   10.0              34   

                Entry Index  Avg Entry Price  Entry Fees  Exit Order Id  \
0 2024-12-23 10:00:00+05:30       973.686600  268.737502             22   
1 2024-12-24 10:15:00+05:30       973.186350  261.300535             -1   
2 2024-12-20 09:15:00+05:30       372.386100  261.415042             23   
3 2024-12-03 12:15:00+05:30      1227.963675  305.762955             13   
4 2024-12-03 15:15:00+05:30      6273.135000   94.097025             35   

                 Exit Index  Avg Exit Price   Exit Fees           PnL  \
0 2024-12-24 09:15:00+05:30      954.122700  263.337865  -4131.832967   
1 2024-12-27 

2024-12-30T18:20:51.615655+05:30 - INFO - CNXSMALLCAP returns fetched using vbt.TVData.
CNXSMALLCAP returns fetched using vbt.TVData.
INFO:OrderBookGeneratorLogger:CNXSMALLCAP returns fetched using vbt.TVData.
2024-12-30T18:20:51.623449+05:30 - INFO - Generating summary from the tradebook...
Generating summary from the tradebook...
INFO:OrderBookGeneratorLogger:Generating summary from the tradebook...
2024-12-30T18:20:51.632264+05:30 - INFO - Creating 'Month' column from 'Entry Index'.
Creating 'Month' column from 'Entry Index'.
INFO:OrderBookGeneratorLogger:Creating 'Month' column from 'Entry Index'.
2024-12-30T18:20:51.648682+05:30 - INFO - Calculating metrics for a trade group...
Calculating metrics for a trade group...
INFO:OrderBookGeneratorLogger:Calculating metrics for a trade group...
2024-12-30T18:20:51.659364+05:30 - INFO - Metrics calculated: {'Month_PnL': -41159.04, 'Month_Trades': 9, 'Unique_Scrips': 5, 'Max_Loss': -13591.39, 'Max_Profit': -785.84, 'Median_Profit': 0, 'Med

Unnamed: 0,Month,Final_Month_PnL,Month_PnL,Month_Trades,Unique_Scrips,Max_Loss,Max_Profit,Median_Profit,Median_Loss,Win_Rate,...,Equal_Weighted_Return,Equal_Weighted_Return_Cost_Adjusted,Cumulative_Equal_Weighted_Return,Cumulative_Equal_Weighted_Return_Cost_Adjusted,Strategy_Cumulative_Return_With_Reinvestments,CNXSMALLCAP_Returns,Cumulative_Strategy_Return,Cumulative_CNXSMALLCAP_Return,Strategy_Drawdown,CNXSMALLCAP_Drawdown
0,Overall,18849.11,849.1,22.0,13.0,-13591.39,16025.95,9877.4,-3281.53,0.36,...,,,,,,,,,,
1,2024-11,-23159.04,-41159.04,9.0,5.0,-13591.39,-785.84,0.0,-2237.43,0.0,...,0.019,0.015,0.019,0.015,-0.0077,0.002599,-0.0077,0.0026,-0.0,0.0
2,2024-12,42008.15,42008.15,13.0,9.0,-13219.4,16025.95,9877.4,-4131.83,0.62,...,0.0549,0.0509,0.0749,0.0667,0.0062,-0.00059,0.0062,0.002,0.0,-0.2308
