In [1]:
# Linear Regression

In [1]:
import datetime as dt

import matplotlib.pyplot as plt
import mplfinance as mpf
import numpy as np
import pandas as pd
import pandas_market_calendars as mcal
import plotly.graph_objects as go
import polars as pl
from dash import Dash, dcc, html
from plotly.subplots import make_subplots

nse = mcal.get_calendar("NSE")

In [2]:
pd.set_option("display.max_rows", 25_000)
pd.set_option("display.max_columns", 500)
pl.Config.set_tbl_cols(500)
pl.Config.set_tbl_rows(10_000)

pd.options.display.float_format = "{:.4f}".format

In [3]:
import sys

sys.path.append("..")
from tooling.enums import AssetClass, Index, Spot, StrikeSpread
from tooling.fetch import fetch_option_data, fetch_spot_data
from tooling.filter import find_atm, option_tool

In [4]:
def get_expiry(f_today):
    days_to_thursday = (3 - f_today.weekday()) % 7
    nearest_thursday = f_today + dt.timedelta(days=days_to_thursday)
    f_expiry = nearest_thursday
    if nse.valid_days(start_date=nearest_thursday, end_date=nearest_thursday).empty:
        f_expiry = nearest_thursday - dt.timedelta(days=1)
    return f_expiry


def get_option_contract_name(symbol, strike, expiry, opt_type):
    temp = "0"
    mth = expiry.month

    if (expiry + dt.timedelta(days=7)).month != expiry.month:
        date_string = expiry.strftime("%y%b").upper()
        return f"{symbol}{date_string}{strike}{opt_type}"
    else:
        if expiry.day <= 9:
            date_string = f"{expiry.year - 2000}{mth}{temp}{expiry.day}"
        else:
            date_string = f"{expiry.year - 2000}{mth}{expiry.day}"
        return f"{symbol}{date_string}{strike}{opt_type}"

In [101]:
bnf_pandas = pd.read_csv("../data/midcp.csv")
# bnf_pandas = pd.read_csv("../data/finnifty_1hr_tv (2).csv")
# bnf_pandas = pd.read_csv('../data/midcp_1hr_tv (4).csv')
# bnf_pandas = pd.read_csv('../data/finnifty_1hr_tv.csv')
# bnf_pandas = pd.read_csv('../data/bnf_fut_1hr_tv.csv')
# bnf_pandas = pd.read_csv('../data/gold_4hr_tv.csv')

In [102]:
# bnf.tail()

In [103]:
# If Stocks Data ...
bnf_pandas["datetime"] = pd.to_datetime(bnf_pandas["datetime"])
bnf_pandas["datetime"] = bnf_pandas["datetime"].dt.tz_localize(None)
bnf_pandas = bnf_pandas[bnf_pandas["datetime"].dt.year >= 2017]
# bnf_pandas.drop(columns=["datetime"], inplace=True)
# bnf_pandas

In [104]:
bnf = pl.DataFrame(bnf_pandas)
print(type(bnf))
# bnf

<class 'polars.dataframe.frame.DataFrame'>


In [105]:
# # For crude oil Data
# bnf1 = pd.DataFrame(bnf_pandas)
# bnf1 = bnf1.drop(columns=['Unnamed: 0', 'Unnamed: 0.1'])
# bnf1['datetime'] = pd.to_datetime(bnf1['datetime'])
# bnf1['index'] = bnf1['datetime']
# bnf1.rename(columns={'o': 'open', 'h': 'high', 'l': 'low', 'c': 'close'}, inplace=True)
# bnf = pl.DataFrame(bnf1)
# print(type(bnf))
# bnf

In [106]:
# bnf['datetime'] = pd.to_datetime(bnf['datetime'])
# bnf = bnf.drop(columns=['Unnamed: 0'])
# bnf.set_index(bnf['datetime'], inplace=True)
# bnf
# bnf = bnf.with_columns(pl.col('datetime').str.to_datetime(format='%Y-%m-%dT%H:%M:%S.%f'))
# print(bnf)
# bnf = bnf.with_columns(pl.col('datetime').cast(pl.DateTime))

# Set 'datetime' column as index
bnf = bnf.with_columns([pl.col("datetime").alias("index")]).drop("datetime")

# Now 'datetime' is set as the index
# bnf

In [107]:
bnf = bnf.with_columns(pl.col("index").alias("datetime"))
# bnf

In [108]:
bnf.tail()

open,high,low,close,volume,index,datetime
f64,f64,f64,f64,i64,datetime[ns],datetime[ns]
11762.3,11763.25,11756.65,11758.2,0,2024-06-07 15:25:00,2024-06-07 15:25:00
11758.3,11758.9,11753.7,11755.85,0,2024-06-07 15:26:00,2024-06-07 15:26:00
11753.9,11757.2,11752.05,11756.4,0,2024-06-07 15:27:00,2024-06-07 15:27:00
11756.45,11757.4,11748.75,11753.7,0,2024-06-07 15:28:00,2024-06-07 15:28:00
11753.2,11753.85,11747.05,11748.6,0,2024-06-07 15:29:00,2024-06-07 15:29:00


In [109]:
def resample(
    data: pl.DataFrame, timeframe, offset: dt.timedelta | None = None
) -> pl.DataFrame:
    return (
        data.set_sorted("datetime")
        .group_by_dynamic(
            index_column="datetime",
            every=timeframe,
            period=timeframe,
            label="left",
            offset=offset,
        )
        .agg(
            [
                pl.col("open").first().alias("open"),
                pl.col("high").max().alias("high"),
                pl.col("low").min().alias("low"),
                pl.col("close").last().alias("close"),
                pl.col("volume").sum().alias("volume"),
            ]
        )
    )

# ohlc_resampled = resample(bnf, '60m', pd.Timedelta(minutes=15))

# bnf_df = bnf.to_pandas()
# bnf_df.set_index(bnf_df['datetime'], inplace=True)
# ohlc_15min = bnf_df.resample('60min').agg({
#     'open': 'first',
#     'high': 'max',
#     'low': 'min',
#     'close': 'last',
# })

# ohlc_15min.dropna(inplace=True)

# ohlc_15min.reset_index(inplace=True)

# bnf_1hr = ohlc_15min
# bnf_1hr = ohlc_resampled
# bnf_final = bnf_1hr.to_pandas()
# bnf_final['datetime'] = pd.to_datetime(bnf_final['datetime'])
# bnf_final
# bnf_1hr

In [110]:
# bnf_final = bnf
# bnf_final

In [111]:
# # bnf_final = bnf_1hr
# bnf_final['datetime'] = pd.to_datetime(bnf_final['datetime'])
# bnf_final

In [112]:
import pandas as pd
import numpy as np

def linear_regression_vectorized(df, period, trailing):
    
    if 'close' not in df.columns:
        print(df.columns)
        raise ValueError("The DataFrame must contain a 'close' column.")

    df['close'] = df['close'].astype(float)
    x = np.arange(period)
    x_sum = np.sum(x)
    x_squared_sum = np.sum(x**2)
    
    denominator = period * x_squared_sum - x_sum**2
    
    y_rolling = df['close'].rolling(window=period)
    
    y_sum = y_rolling.sum()
    xy_sum = y_rolling.apply(lambda y: np.dot(x, y), raw=True)
    
    slope = (period * xy_sum - x_sum * y_sum) / denominator
    intercept = (y_sum - slope * x_sum) / period
    
    predicted_values = intercept + slope * (period - 1)
    df['LR'] = predicted_values

    x = np.arange(trailing)
    x_sum = np.sum(x)
    x_squared_sum = np.sum(x**2)
    
    denominator = trailing * x_squared_sum - x_sum**2
    
    y_rolling = df['close'].rolling(window=trailing)
    
    y_sum = y_rolling.sum()
    xy_sum = y_rolling.apply(lambda y: np.dot(x, y), raw=True)
    
    slope = (trailing * xy_sum - x_sum * y_sum) / denominator
    intercept = (y_sum - slope * x_sum) / trailing
    
    predicted_values = intercept + slope * (trailing - 1)
    df['LR2'] = predicted_values
    
    return df

# data = linear_regression_vectorized(bnf.to_pandas(), 20)

In [113]:
# # Example usage
# df = pd.DataFrame({
#     'Date': pd.date_range(start='2023-01-01', periods=50, freq='D'),
#     'Close': np.random.uniform(100, 200, 50)
# }).

# df.set_index('Date', inplace=True)

# # Apply the optimized linear regression function
# regression_values = linear_regression_vectorized(df, period=14)
# df['Linear_Regression_Close'] = regression_values

# # Display the DataFrame with the new column
# print(df.tail(10))

In [114]:
def generate_signals1(df, candles_in_num):
    df["close"] = pd.to_numeric(df["close"], errors="coerce")

    df["Buy_Signal"] = 0
    buy_signal_mask = df["LR"] > df["LR"].shift(1)

    for i in range(1, candles_in_num):
        buy_signal_mask &= df["LR"].shift(i) > df["LR"].shift(i + 1)

    df.loc[buy_signal_mask, "Buy_Signal"] = 1

    return df

In [115]:
def generate_signals2(df, candles_in_num):
    df["c"] = pd.to_numeric(df["c"], errors="coerce")
    
    df["Sell_Signal"] = 0

    # Generate signals using boolean masking
    sell_signal_mask = df["LR"] < df["LR"].shift(1)

    for i in range(1, candles_in_num):
        sell_signal_mask &= df["LR"].shift(i) < df["LR"].shift(i + 1)

    df.loc[sell_signal_mask, "Sell_Signal"] = 1

    return df

In [123]:
# Positional

def execute(df, n):

    trade_book = []
    in_trade = False
    signal_entry_price = 100000
    signal_initial_sl = 0
    already_signal_exists = False
    is_trailing_active = False
    remark = ""
    portfolio_value = 1_00_00_000
    index_lev = 6

    for i in range(1, len(df)):
        points = 0
        current_candle_open = df.iloc[i]["o"]
        current_candle_high = df.iloc[i]["h"]
        current_candle_low = df.iloc[i]["l"]
        current_candle_close = df.iloc[i]["c"]
        current_linear_regression = df.iloc[i]['LR2']

        if not in_trade:
            if df.iloc[i]["Buy_Signal"] == 1:
                if not already_signal_exists:
                    # Fresh Buy Signal
                    # print(df.iloc[i])
                    # print('Fresh Buy Signal')
                    signal_entry_price = current_candle_high
                    # signal_initial_sl = current_candle_low
                    signal_initial_sl = df.iloc[i-n-1:i+1]['l'].min()
                    signal_creation_time = df.iloc[i]["datetime"]
                    already_signal_exists = True

                else:
                    if current_candle_open > signal_entry_price:
                        # Gap Up Open, SKIP trade
                        # print(df.iloc[i])
                        # print('Gap Up Open, Skip Trade')
                        already_signal_exists = False
                        signal_entry_price = 100000
                        signal_initial_sl = 0
                    elif current_candle_high < signal_entry_price:
                        # Better Candle
                        # print(df.iloc[i])
                        # print('Better Candle')
                        signal_entry_price = current_candle_high
                        # signal_initial_sl = current_candle_low
                        signal_initial_sl = df.iloc[i-n-1:i+1]['l'].min()
                        signal_creation_time = df.iloc[i]["datetime"]

                        # if (signal_entry_price - signal_initial_sl > 400):
                        #     # Skip Signal Candle Due To Big Size
                        #     already_signal_exists = False
                        #     signal_entry_price = 100000
                        #     signal_initial_sl = 0

                    elif current_candle_high > signal_entry_price:
                        # Entry Triggered
                        # print(df.iloc[i])
                        # print('Entry Triggered')
                        in_trade = True
                        entry_time = df.iloc[i]["datetime"]
                        entry_price = signal_entry_price
                        points = 0
            else:
                if df.iloc[i - 1]["Buy_Signal"] == 1:
                    # Considering the forward bias condition as well now
                    signal_entry_price = df.iloc[i - 1]["h"]
                    # signal_initial_sl = df.iloc[i - 1]["l"]
                    signal_initial_sl = df.iloc[i-n-2:i]['l'].min()
                    signal_creation_time = df.iloc[i - 1]["datetime"]
                    already_signal_exists = True

                    if current_candle_high > signal_entry_price:
                        in_trade = True
                        entry_time = df.iloc[i]["datetime"]
                        entry_price = signal_entry_price
                        points = 0
                    else:
                        # Discard Existing Signal
                        already_signal_exists = False
                        signal_entry_price = 100000
                        signal_initial_sl = 0

        if in_trade:
            trade_entry_price = signal_entry_price
            trade_initial_sl = signal_initial_sl
            trade_final_sl = signal_initial_sl

            if (
                not is_trailing_active
                and current_candle_low > current_linear_regression
            ):
                is_trailing_active = True

            if not is_trailing_active:
                if current_candle_open < trade_initial_sl:
                    if (
                        df.iloc[i]["datetime"].date() == entry_time.date()
                        and df.iloc[i]["datetime"].time() == entry_time.time()
                    ):
                        if current_candle_close <= trade_initial_sl:
                            in_trade = False
                            points = trade_initial_sl - trade_entry_price
                            exit_price = trade_initial_sl
                            exit_time = df.iloc[i]["datetime"]
                            remark = "Initial SL hit"

                    else:
                        # Gap Open Outside ISL
                        # print(df.iloc[i])
                        # print('Gap Open Outside ISL')
                        in_trade = False
                        points = current_candle_open - trade_entry_price
                        exit_price = current_candle_open
                        exit_time = df.iloc[i]["datetime"]
                        remark = "Gap Open Outside ISL"

                elif current_candle_low <= trade_initial_sl:
                    # Initial SL Hit
                    # print(df.iloc[i])
                    # print('Initial SL Hit')
                    in_trade = False
                    points = trade_initial_sl - trade_entry_price
                    exit_price = trade_initial_sl
                    exit_time = df.iloc[i]["datetime"]
                    remark = "Initial SL Hit"

            else:
                trade_final_sl = max(trade_initial_sl, current_linear_regression)

                if current_candle_open <= trade_initial_sl:
                    if (
                        df.iloc[i]["datetime"].date() == entry_time.date()
                        and df.iloc[i]["datetime"].time() == entry_time.time()
                    ):
                        if current_candle_close <= trade_initial_sl:
                            in_trade = False
                            points = trade_initial_sl - trade_entry_price
                            exit_price = trade_initial_sl
                            exit_time = df.iloc[i]["datetime"]
                            remark = "Initial SL hit"

                    else:
                        # Gap Open Outside ISL
                        # print(df.iloc[i])
                        # print('Gap Open Outside ISL')
                        in_trade = False
                        points = current_candle_open - trade_entry_price
                        exit_price = current_candle_open
                        exit_time = df.iloc[i]["datetime"]
                        remark = "Gap Open Outside ISL"

                elif current_candle_low <= trade_initial_sl:
                    # if trade_initial_sl >= current_linear_regression:
                    # Despite Trailing, Initial SL hit
                    # print(df.iloc[i])
                    # print('Initial SL Hit')
                    in_trade = False
                    points = trade_initial_sl - trade_entry_price
                    exit_price = trade_initial_sl
                    exit_time = df.iloc[i]["datetime"]
                    remark = "Initial SL hit"

                elif (current_candle_close <= trade_final_sl):
                    # Price Closed below TSL i.e. MA10 , TSL Hit
                    # print(df.iloc[i])
                    # print('Initial SL Hit')
                    in_trade = False
                    points = current_candle_close - trade_entry_price
                    exit_price = current_candle_close
                    exit_time = df.iloc[i]["datetime"]
                    is_trailing_active = False
                    remark = "TSL Hit"

            if points:
                
                max_qty = int(round(portfolio_value * index_lev / entry_price / 15)) * 15
                rpt_qty = portfolio_value * 0.01 / abs(entry_price - trade_initial_sl)
                # qty = min(max_qty, rpt_qty)
                qty = max_qty
                slippage = 0.0001 * (entry_price + exit_price)
                # slippage = 10
                final_points = points - slippage
                # final_points = points

                # if vol_entry * strike < 250000000:
                #     vol_remark = 'ILLIQUID'
                # elif vol_entry * strike >= 250000000:
                #     vol_remark = 'LIQUID'z
                
                trade = {
                    "Signal Generated At": signal_creation_time,
                    "Trade Type": "LONG",
                    "Entry Date": entry_time.date(),
                    "Entry Time": entry_time.time(),
                    "Entry Price": entry_price,
                    "Initial SL": trade_initial_sl,
                    "Final SL": trade_final_sl,
                    # "Target": trade_target,
                    "Exit Time": exit_time,
                    "Exit Price": exit_price,
                    "Points Captured": points,
                    "Slippages": slippage,
                    "After Costs": final_points,
                    "PnL": final_points * qty,
                    "Remarks": remark,
                    # 'Volume at Entry Candle': vol_entry,
                    # 'Volume Remark': vol_remark,
                    "Max Qty": max_qty,
                    "RPT Qty": rpt_qty,
                    'Qty': qty,
                    "Leverage": index_lev,
                    "ROI%": (final_points * qty / portfolio_value) * 100,
                    # "Max Price": max_price,
                    # "Min Price": min_price,
                    "Trade Year": entry_time.year,
                    "Trade Month": entry_time.month,
                    # "Variation": f"{signal_ma}, {no_of_candles}, {tf}, {sl}% SL",
                }
                trade_book.append(trade)
                points = 0
                in_trade = False
                already_signal_exists = False
                remark = ""
                is_trailing_active = False

    trade_book_df = pd.DataFrame(trade_book)
    return trade_book_df

In [124]:
bnf1 = bnf
bnf2 = bnf

In [125]:
def generate_stats(tb_expiry, variation):
    stats_df8 = pd.DataFrame(
        index=range(2017, 2025),
        columns=[
            "Total ROI",
            "Total Trades",
            "Win Rate",
            "Avg Profit% per Trade",
            "Avg Loss% per Trade",
            "Max Drawdown",
            "ROI/DD Ratio",
            "Variation",
        ],
    )
    combined_df_sorted = tb_expiry
    # combined_df_sorted = tb_expiry_ce
    # combined_df_sorted = tb_expiry_pe
    
    # Iterate over each year
    for year in range(2017, 2025):
        # Filter trades for the current year
        year_trades = combined_df_sorted[(combined_df_sorted["Trade Year"] == year)]
    
        # Calculate total ROI
        total_roi = year_trades["ROI%"].sum()
    
        # Calculate total number of trades
        total_trades = len(year_trades)
    
        # Calculate win rate
        win_rate = (year_trades["ROI%"] > 0).mean() * 100
    
        # Calculate average profit per trade
        avg_profit = year_trades[year_trades["ROI%"] > 0]["ROI%"].mean()
    
        # Calculate average loss per trade
        avg_loss = year_trades[year_trades["ROI%"] < 0]["ROI%"].mean()
    
        # Calculate maximum drawdown
        max_drawdown = (
            year_trades["ROI%"].cumsum() - year_trades["ROI%"].cumsum().cummax()
        ).min()
    
        # Calculate ROI/DD ratio
        roi_dd_ratio = total_roi / abs(max_drawdown)

        variation = variation
    
        # Store the statistics in the DataFrame
        stats_df8.loc[year] = [
            total_roi,
            total_trades,
            win_rate,
            avg_profit,
            avg_loss,
            max_drawdown,
            roi_dd_ratio,
            variation,
        ]
    
    # Calculate overall statistics
    overall_total_roi = stats_df8["Total ROI"].sum()
    overall_total_trades = stats_df8["Total Trades"].sum()
    overall_win_rate = (combined_df_sorted["ROI%"] > 0).mean() * 100
    overall_avg_profit = combined_df_sorted[combined_df_sorted["ROI%"] > 0]["ROI%"].mean()
    overall_avg_loss = combined_df_sorted[combined_df_sorted["ROI%"] < 0]["ROI%"].mean()
    overall_max_drawdown = (
        combined_df_sorted["ROI%"].cumsum() - combined_df_sorted["ROI%"].cumsum().cummax()
    ).min()
    overall_roi_dd_ratio = overall_total_roi / abs(overall_max_drawdown)
    overall_variation = variation
    
    # Store the overall statistics in the DataFrame
    stats_df8.loc["Overall"] = [
        overall_total_roi,
        overall_total_trades,
        overall_win_rate,
        overall_avg_profit,
        overall_avg_loss,
        overall_max_drawdown,
        overall_roi_dd_ratio,
        overall_variation,
    ]
    return {overall_roi_dd_ratio : stats_df8}

In [126]:
bnf1 = bnf1.to_pandas()

In [127]:
# # bnf2 = bnf2.to_pandas()
# data = linear_regression_vectorized(bnf1, 80)
# signals_df1 = generate_signals1(data, 1)
# # signals_df2 = generate_signals2(bnf2)

In [128]:
# data[data['Buy_Signal'] > 0].tail()

In [129]:
# bnf1.tail()

In [130]:
bnf2 = resample(pl.DataFrame(bnf1), "15m", "0m")
data = linear_regression_vectorized(bnf2.to_pandas(), 45, 25)
# print(data.tail(10).to_string())
signals_df1 = generate_signals1(data, 3)
signals_df1 = signals_df1.rename(columns={"open": "o", "high": "h", "low": "l", "close": "c"})
new_tb = execute(signals_df1, 1)

# new_tb = execute(signals_df1)
new_tb["DD%"] = new_tb["ROI%"].cumsum() - new_tb["ROI%"].cumsum().cummax()
tradebook_buy_side = new_tb
# new_tb.tail(25)

In [131]:
# tradebook_buy_side['DD%'] = (tradebook_buy_side['ROI%'].cumsum() - tradebook_buy_side['ROI%'].cumsum().cummax())
# tradebook_buy_side[tradebook_buy_side['Trade Year'] >= 2024]

In [132]:
stats2 = generate_stats(new_tb, "...")
roi_overall, stats_overall = next(iter(stats2.items()))
stats_overall

Unnamed: 0,Total ROI,Total Trades,Win Rate,Avg Profit% per Trade,Avg Loss% per Trade,Max Drawdown,ROI/DD Ratio,Variation
2017,0.0,0,,,,,,...
2018,0.0,0,,,,,,...
2019,0.0,0,,,,,,...
2020,0.0,0,,,,,,...
2021,0.0,0,,,,,,...
2022,159.2771,275,33.0909,4.8657,-1.5408,-20.8362,7.6443,...
2023,158.4755,331,38.9728,2.9174,-1.0785,-17.8991,8.8538,...
2024,77.1665,135,42.2222,3.5096,-1.5754,-17.4239,4.4288,...
Overall,394.9191,741,37.3819,3.6793,-1.3454,-20.8362,18.9535,...


In [134]:
tradebook_buy_side.to_csv('MIDCP_LR.csv')