In [37]:
import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import re
import requests
import xml.etree.ElementTree as ET
from IPython.display import display, HTML
from scipy import interpolate
from yahoo_fin import options as op, stock_info as si

import logging


# Create a logger
logger = logging.getLogger(__name__)
logger.setLevel(logging.ERROR)

# Create a file handler
handler = logging.FileHandler('error_log.txt')
handler.setLevel(logging.ERROR)

# Create a formatter and add it to the handler
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)

# Add the handler to the logger
logger.addHandler(handler)

# Global flag for controlling print statements
enable_print = False

def custom_print(*args, **kwargs):
    global enable_print
    if enable_print:
        print(*args, **kwargs)


In [38]:
from yahoo_fin import options as op, stock_info as si
s = si.get_live_price("^GSPC")
print(s)
l = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
l[:-2]
l[-2]

4780.93994140625


  return df.close[-1]


5.0

In [39]:
import mibian

# Define the parameters
underlyingPrice = si.get_live_price("^GSPC")
strikePrice = 1200
interestRate = 0.01
daysToExpiration = 2
optionPrice = 0.025 #3360.5 	
option_type = 'None'
volatility = 492.1875 #0.00010 

# Create an instance of the BS class with the parameters
if option_type == 'C':
    print('Call I.V')
    bs = mibian.BS([underlyingPrice, strikePrice, interestRate, daysToExpiration], callPrice=optionPrice)
elif option_type == 'P':
    print('Put I.V')
    bs = mibian.BS([underlyingPrice, strikePrice, interestRate, daysToExpiration], putPrice=optionPrice)
elif volatility != None:
    print('Volatility I.V')
    bs = mibian.BS([underlyingPrice, strikePrice, interestRate, daysToExpiration], volatility=volatility)    
    print(bs.callPrice, bs.putPrice)
else:
    print('Invalid option type')    

# Print the implied volatility
if volatility == None:
    print(bs.impliedVolatility)

Volatility I.V
3580.9556062697693 0.015007329453080726


  return df.close[-1]


In [40]:
import datetime
import pytz


def chicago_eod_four_PM(eod, near_term=True):
    # Create a datetime object for 4 PM on November 10
    # replace 2023 with the correct year
    # 14.17 as of 11th Nov 2020
    four_pm = datetime.datetime(
        eod['year'], eod['month'], eod['day'], eod['hour'], eod['minute'], 0)  # 10th Nov 2023

    # Convert the datetime object to Eastern Time
    eastern = pytz.timezone('US/Eastern')
    four_pm_et = eastern.localize(four_pm)
    custom_print("four_pm_et: ", four_pm_et)

    tomorrow = datetime.datetime(
        four_pm_et.year, four_pm_et.month, four_pm_et.day, tzinfo=four_pm_et.tzinfo) + datetime.timedelta(1)
    days = [four_pm_et + datetime.timedelta(index) for index in range(24, 38)]

    friday = next(day for day in days if day.weekday() == 4)
    next_term = -1 if near_term == True else 6
    days_to_expiration = (friday - four_pm_et).days + next_term
    expiration_day = datetime.datetime(
        four_pm_et.year, four_pm_et.month, four_pm_et.day,
        tzinfo=four_pm_et.tzinfo) + datetime.timedelta(days_to_expiration + 1)
    custom_print("days to expiration: ", days_to_expiration)
    custom_print("friday: ", friday)
    custom_print("expiration day: ", expiration_day)
    custom_print("expiration day - calculation day: ", expiration_day - four_pm_et)

    # minutes_to_settlement = 570 if friday.day in range(     #friday.day to expiration_day.day will cover near and next term
    #     15, 22) and friday.weekday() == 4 else 960

    minutes_to_settlement = 570 if expiration_day.day in range(
        15, 22) and friday.weekday() == 4 else 960

    minutes_to_midnight_today = (tomorrow - four_pm_et).seconds // 60
    minutes_to_expiration = days_to_expiration * 24 * 60

    custom_print(minutes_to_midnight_today, minutes_to_settlement, minutes_to_expiration)
    time_to_expiration = (minutes_to_midnight_today +
                          minutes_to_settlement + minutes_to_expiration)

    # time_to_expiration = (minutes_to_midnight_today +
    #                       minutes_to_settlement + minutes_to_expiration) / 525600

    return (time_to_expiration, expiration_day, four_pm_et, days_to_expiration)


yesterday = datetime.datetime(2023, 1, 16) - datetime.timedelta(days=0)
eod = {
    "year": yesterday.year,
    "month": yesterday.month,
    "day": yesterday.day,
    "hour": 16,
    "minute": 15,
    "second": 0
}
print("Time to expiration: ", chicago_eod_four_PM(
    eod, True)[1], chicago_eod_four_PM(eod, False)[1])


Time to expiration:  2023-02-10 00:00:00-05:00 2023-02-17 00:00:00-05:00


In [41]:
import json
import requests
import re


def scrape_us_treasury_yield_curve_and_transpose_it_to_minutes_scale(days = 0):
    """
    Retrieves and formats the US Treasury yield curve.
    No arguments.
    """
    calctime = datetime.datetime.now() - datetime.timedelta(days)
    year = calctime.year
    month = calctime.month
    custom_print(year, month)
    # year = datetime.datetime.now().year
    # month = datetime.datetime.now().month
    url = f"https://home.treasury.gov/resource-center/data-chart-center/interest-rates/pages/xmlview?data=daily_treasury_yield_curve&field_tdr_date_value_month={year}{month:02d}"
    response = requests.get(url)

    #print(json.dumps(response.content.decode(), indent=4))
    root = ET.fromstring(response.content)

    temp_us_treasury_dict = {}
    us_treasury_dict = {}

    for elt in root.iter():
        temp_us_treasury_dict[elt.tag[-8:]] = elt.text

    for key in temp_us_treasury_dict.keys():
        if (key.find("MONTH") + key.find("YEAR") + key.find("DATE") != -3):
            us_treasury_dict[re.sub(r'.*_', '', key)
                             ] = temp_us_treasury_dict[key]

    # Based on the average number of day in a month: 30.42
    minutes_in_a_month = 43804
    minutes_in_a_year = 525600

    # minute axis
    #x = [int(key[:-5])*minutes_in_a_month if "MONTH" in key else int(key[:-4])*minutes_in_a_year if "YEAR" in key else None for key in us_treasury_dict.keys()]
    x = [int(key[:-5])*minutes_in_a_month if "MONTH" in key else int(key[:-4]) *
         minutes_in_a_year if "YEAR" in key else None for key in us_treasury_dict.keys() if "MONTH" in key or "YEAR" in key]

    # yield axis
    y = [us_treasury_dict[key]
         for key in us_treasury_dict.keys() if "MONTH" in key or "YEAR" in key]
    custom_print("Yield curve:", us_treasury_dict)
    custom_print("Yield curve in minutes scale:", dict(zip(x, y)))

    return [x, y]

r = scrape_us_treasury_yield_curve_and_transpose_it_to_minutes_scale(days = 365)
r

[[43804,
  87608,
  131412,
  175216,
  262824,
  525600,
  1051200,
  1576800,
  2628000,
  3679200,
  5256000,
  10512000,
  15768000],
 ['4.58',
  '4.64',
  '4.70',
  '4.74',
  '4.80',
  '4.68',
  '4.21',
  '3.90',
  '3.63',
  '3.59',
  '3.52',
  '3.78',
  '3.65']]

In [42]:
import numpy as np
from scipy.interpolate import CubicSpline
from datetime import date
import math


def cubic_spline_risk_free_rate(minutes_to_expiration, days = 0):
    """
    Estimates the risk free rate (based on the US treasury yield) at a specific time to expiration
    :param <time_to_expiration>: integer ; time to expiration in day or minutes 
    :param <minute_data>: boolean ; indicates if the argument <time_to_expiration> is in minutes or days
    """

    # Extract the tenors and corresponding yields from the US Treasury yield curve
    yc = scrape_us_treasury_yield_curve_and_transpose_it_to_minutes_scale()
    tenors = yc[0]
    rates = yc[1]

    # Sort the data by tenor
    sorted_indices = np.argsort(tenors)
    # print(sorted_indices)
    #tenors_sorted = tenors[sorted_indices.tolist()]
    #rates_sorted = rates[sorted_indices.tolist()]

    tenors_sorted = [tenors[i] for i in sorted_indices]
    rates_sorted = [rates[i] for i in sorted_indices]

    # print(tenors_sorted)
    # print(rates_sorted)

    # Create a cubic spline interpolator
    cs = CubicSpline(tenors_sorted, rates_sorted,
                     extrapolate=True)  # Allow extrapolation

    # Specify expiration dates of near-term and next-term options (replace with actual dates)
    #near_term_expiration = date(2023, 12, 31)
    #next_term_expiration = date(2024, 3, 31)

    # Calculate time remaining until expiration in years
    #today = date.today()
    #time_to_near_term = (near_term_expiration - today).days / 365
    #time_to_next_term = (next_term_expiration - today).days / 365

    time_to_near_term = minutes_to_expiration[0]
    time_to_next_term = minutes_to_expiration[1]
    #time_to_near_term = 34,484
    #time_to_next_term = 44,954

    # Calculate interpolated/extrapolated yields using cubic spline
    yield_R1 = cs(time_to_near_term)
    yield_R2 = cs(time_to_next_term)
    # print(f"R1 (BEY) for {time_to_near_term} minutes: {yield_R1}")
    # print(f"R2 (BEY) for {time_to_next_term} minutes: {yield_R2}")

    # Convert BEY to APY using the correct formula
    APY_R1 = ((1 + yield_R1 / 2) ** 2) - 1
    APY_R2 = ((1 + yield_R2 / 2) ** 2) - 1

    r_near = np.log(1 + APY_R1)
    r_next = np.log(1 + APY_R2)

    # print(f"R1 (APY) for {time_to_near_term} minutes: {APY_R1}")
    # print(f"R2 (APY) for {time_to_next_term} minutes: {APY_R2}")

    # print(f"R1 (ln(APY+1)) for {time_to_near_term} minutes: {r_near}")
    # print(f"R2 (ln(APY+1)) for {time_to_next_term} minutes: {r_next}")

    # return [APY_R1, APY_R2]
    # return [r_near, r_next]
    return [yield_R1, yield_R2]


In [43]:
def add_strike_diff(df):
    # Create a new column 'strike_prev' that contains the previous 'strike' values
    df['strike_prev'] = df['strike'].shift(-1)

    # Create a new column 'strike_next' that contains the next 'strike' values
    df['strike_next'] = df['strike'].shift(1)

    # Create a new column 'strike_diff' that contains the differences between 'strike_next' and 'strike_prev'
    df['strike_diff'] = (df['strike_prev'] - df['strike_next'])/2

    # For the first and last 'strike', keep the original 'strike' values
    # df.loc[0, 'strike_diff'] = df.loc[1, 'strike'] - df.loc[0, 'strike']
    df.iloc[0, df.columns.get_loc('strike_diff')] = df.iloc[1, df.columns.get_loc('strike')] - \
        df.iloc[0, df.columns.get_loc('strike')]

    df.iloc[-1, df.columns.get_loc('strike_diff')] = df.iloc[-1, df.columns.get_loc('strike')] -\
        df.iloc[-2, df.columns.get_loc('strike')]
    # df.loc[df.index[-1], 'strike_diff'] = df.loc[df.index[-1],
    #                                              'strike'] - df.loc[df.index[-2], 'strike']
    return df


In [44]:
def remove_strikes(df, option_type = None):
    # Create new columns that contain the 'bid' values of the preceding and following rows
    df['bid_prev1'] = df['bid'].shift(1)
    df['bid_next1'] = df['bid'].shift(-1)

    # For 'Call' options, find the first index where 'bid' and 'bid_next1' are 0
    if option_type == 'Call' or df['option_type'].isin(['C', 'Call']).any():
        try:
            index_to_drop = df[(df['bid'] == 0) & (
                df['bid_next1'] == 0)].index[0]
            # Drop all rows from 'index_to_drop' to the end of the DataFrame
            df = df.loc[:index_to_drop-1]
        except IndexError:
            pass
        except KeyError as e:
        # Log the state of the DataFrame when the error occurs
            logger.info(f"Error: {e}")
            logger.info(f"DataFrame state: {df.to_string()}")
            logger.info(f"index_to_drop: {index_to_drop}")
            logger.info(f"last_index: {last_index}")
            #print(f"Error: {e}")
            #print(f"DataFrame state: {df.to_string()}")
            print(f"index_to_drop: {index_to_drop}")
            print(f"last_index: {last_index}")

    # For 'Put' options, find the first index where 'bid' and 'bid_prev1' are 0
    elif option_type == 'Put' or df['option_type'].isin(['P', 'Put']).any():
        try:
            index_to_drop = df[(df['bid'] == 0) & (
                df['bid_prev1'] == 0)].index[0]
            # Drop all rows from the start of the DataFrame to 'index_to_drop'
            last_index = df.index[-1]
            if index_to_drop < last_index:
                df = df.loc[index_to_drop+1:]
            else:
                df = df.loc[:last_index]
            #df = df.loc[index_to_drop+1:]
        except IndexError:
            pass
        except KeyError as e:
        # Log the state of the DataFrame when the error occurs
            logger.info(f"Error: {e}")
            logger.info(f"DataFrame state: {df.to_string()}")
            logger.info(f"index_to_drop: {index_to_drop}")
            logger.info(f"last_index: {last_index}")
            #print(f"Error: {e}")
            #print(f"DataFrame state: {df.to_string()}")
            print(f"index_to_drop: {index_to_drop}")
            print(f"last_index: {last_index}")

    # Drop the 'bid_prev1' and 'bid_next1' columns
    df = df.drop(columns=['bid_prev1', 'bid_next1'])

    # remove any row that has zeor bid
    df = df[df['bid'] != 0]

    ####display(HTML(df.to_html(index=False, border=0)))

    return df


In [45]:
def forward_and_atm(min_diff_strike, df_calls, df_puts, activation, near_term):
    """
    Calculate forward VIX by extraploating minimum strike price...
    """
    df_calls_copy = df_calls.copy().set_index('strike', inplace=False)
    df_puts_copy = df_puts.copy().set_index('strike', inplace=False)

    forward = min_diff_strike + \
        activation * (df_calls.loc[df_calls['strike'] == min_diff_strike, 'mid-quote'].values[0] -
                      df_puts.loc[df_puts['strike'] == min_diff_strike, 'mid-quote'].values[0])

    print("Forward: ", forward)
    common_strikes = df_calls_copy.index.intersection(df_puts_copy.index)

    lower_strikes = common_strikes[common_strikes < forward]
    atm_strike = lower_strikes.max()

    #atm_strike = df_calls[df_calls['strike'] < f_near]['strike'].max()

    row_calls = df_calls.loc[df_calls['strike']
                             == atm_strike].reset_index(drop=True)
    row_puts = df_puts.loc[df_puts['strike']
                           == atm_strike].reset_index(drop=True)
    average_values = (row_calls[['mid-quote', 'bid', 'ask', 'px_last']] +
                      row_puts[['mid-quote', 'bid', 'ask', 'px_last']]) / 2
    average_values['strike'] = atm_strike
    average_values['option_type'] = 'ATM Avg Put/Call'
    atm_df = average_values[['strike', 'bid', 'ask',
                             'option_type', 'px_last', 'mid-quote']]

    #display(HTML(atm_df.to_html(index=False, border=0)))
    return forward, atm_df, atm_strike


In [46]:
def find_min_diff_strike(df_calls, df_puts):
    # Set the strike price as the index in both dataframes
    df_calls_copy = df_calls.copy()
    df_puts_copy = df_puts.copy()

    df_calls_copy.set_index('strike', inplace=True)
    df_puts_copy.set_index('strike', inplace=True)

    # Calculate the difference between the mid-quote values

    mid_quote_diff = (df_calls_copy['mid-quote'] -
                      df_puts_copy['mid-quote']).abs()

    # Find the strike price with the minimum difference
    try:
        min_diff_strike = mid_quote_diff.idxmin()
    except ValueError:
        logger.error("Attempted to get argmin of an empty sequence.")
        logger.error(f"mid_quote_diff: {mid_quote_diff}")
        logger.error(f"df_calls_copy: {df_calls_copy} df_puts_copy: {df_puts_copy}")
        min_diff_strike = None

    custom_print("min_diff_strike: ", min_diff_strike)
    return min_diff_strike


In [47]:
def find_min_diff_strike_new(df_calls, df_puts, activation, near_term):

    ##display(HTML(df_puts.to_html(index=False, border=0)))
    ##display(HTML(df_calls.to_html(index=False, border=0)))

    # Set the strike price as the index in both dataframes
    df_calls_local = df_calls.copy()
    df_puts_local = df_puts.copy()

    df_calls_local['strike-copy'] = df_calls_local['strike']
    df_puts_local['strike-copy'] = df_puts_local['strike']

    df_calls_local.set_index('strike', inplace=True)
    df_puts_local.set_index('strike', inplace=True)

    # Find common strikes
    common_strikes = df_calls_local.index.intersection(df_puts_local.index)

    # Filter dataframes to only include common strikes
    df_calls_local = df_calls_local.loc[common_strikes]
    df_puts_local = df_puts_local.loc[common_strikes]

    # Filter out rows where mid-quote is zero in both dataframes
    non_zero_mask = ~((df_calls_local['mid-quote'] == 0)
                      & (df_puts_local['mid-quote'] == 0))
    df_calls_local = df_calls_local[non_zero_mask]
    df_puts_local = df_puts_local[non_zero_mask]

    # Calculate the difference between the mid-quote values
    mid_quote_diff = (
        df_calls_local['mid-quote'] - df_puts_local['mid-quote']).abs()

    # Find the strike price with the minimum difference
    try:
        min_diff_strike = mid_quote_diff.idxmin()
    except ValueError:
        logger.error("Attempted to get argmin of an empty sequence.")
        logger.error(f"mid_quote_diff: {mid_quote_diff}")
        logger.error(f"df_calls_copy: {df_calls_copy} df_puts_copy: {df_puts_copy}")
        min_diff_strike = None    
    custom_print("min_diff_strike: ", min_diff_strike)

    #######################################################################################
    forward = min_diff_strike + \
        activation * (df_calls.loc[df_calls['strike'] == min_diff_strike, 'mid-quote'].values[0] -
                      df_puts.loc[df_puts['strike'] == min_diff_strike, 'mid-quote'].values[0])

    custom_print("Forward: ", forward)

    atm_strike_another = df_calls_local.loc[df_calls_local['strike-copy']
                                            < forward, 'strike-copy'].max()
    custom_print("atm_strike-another: ", atm_strike_another)

    atm_strike = df_calls_local[df_calls_local['strike-copy']
                                < forward]['strike-copy'].max()

    custom_print("atm_strike: ", atm_strike)
    ###display(HTML(df_calls_local.to_html(index=False, border=0)))
    ###display(HTML(df_puts_local.to_html(index=False, border=0)))

    row_calls = df_calls.loc[df_calls['strike']
                             == atm_strike].reset_index(drop=True)
    row_puts = df_puts.loc[df_puts['strike']
                           == atm_strike].reset_index(drop=True)
    average_values = (row_calls[['mid-quote', 'bid', 'ask', 'px_last']] +
                      row_puts[['mid-quote', 'bid', 'ask', 'px_last']]) / 2
    average_values['strike'] = atm_strike
    average_values['option_type'] = 'ATM Avg Put/Call'
    atm_df = average_values[['strike', 'bid', 'ask',
                             'option_type', 'px_last', 'mid-quote']]

    #######display(HTML(atm_df.to_html(index=False, border=0)))
    return forward, atm_df, atm_strike
    ##############################################################################################

    # return min_diff_strike


In [48]:
def preapare_data(df):
    df = df.fillna(0)
    # Split the dataframe into calls and puts
    df_calls = df[df['option_type'] == 'C'].copy()
    df_puts = df[df['option_type'] == 'P'].copy()

    df_puts.loc[:, 'mid-quote'] = (df_puts['bid'] + df_puts['ask']) / 2
    df_calls.loc[:, 'mid-quote'] = (df_calls['bid'] + df_calls['ask']) / 2

    return df_calls, df_puts


In [49]:
import pandas as pd
from IPython.display import display, HTML


def chain_of_responsibility(df, r, t, near_term=True):

    df = df.fillna(0)

    df_calls, df_puts = preapare_data(df)

    # Remove the strikes with no bid price
    df_puts, df_calls = [remove_strikes(
        df_puts, 'Put'), remove_strikes(df_calls, 'Call')]

    # if near_term is False:
    #     display(HTML(df_puts.to_html(index=False, border=0)))
    #     display(HTML(df_calls.to_html(index=False, border=0)))

    # Find the min strike difference
    ######min_diff_strike = find_min_diff_strike_new(df_calls, df_puts, near_term)

    ###########################
    activation = np.exp(r*t)
    forward, df_atm, atm_strike = find_min_diff_strike_new(
        df_calls, df_puts, activation, near_term)
    ##########################

    ####print(forward, df_atm, atm_strike)

    # Calculate Forwrard
    # activation = np.exp(r*t)
    # forward, df_atm, atm_strike = forward_and_atm(
    #     min_diff_strike, df_calls.copy(), df_puts.copy(), activation, near_term)

    # activation = np.exp(r*t)
    # forward, df_atm, atm_strike = calculate_forward(
    #     min_diff_strike, df_calls, df_puts, activation, near_term)

    # Filter the dataframe to include only OTM strike
    df_puts = df_puts[df_puts['strike'] < atm_strike]
    df_calls = df_calls[df_calls['strike'] > atm_strike]

    # Remove the strikes with no bid price
    df_puts, df_calls = [remove_strikes(df_puts, 'Put'), remove_strikes(df_calls, 'Call')]

    # Combine data frames
    df_otm = pd.concat([df_puts, df_atm, df_calls], ignore_index=True)
    df_otm = df_otm.sort_values('strike')

    # Add adjescent strike difference
    df_otm = add_strike_diff(df_otm)

    # Contribution per Strike
    df_otm['strike_squared'] = df_otm['strike'] ** 2
    df_otm['strike_contribution'] = (df_otm['strike_diff'] * df_otm['mid-quote'] * activation)/df_otm['strike_squared']

    #display(HTML(df_otm.to_html(index=False, border=0)))
    total_contribution = 2 * df_otm['strike_contribution'].sum()/t
    #print("Total contribution", total_contribution)
    decay = ((forward/atm_strike - 1) ** 2)/t

    sigma_squared = (total_contribution - decay)
    ################################Per strike Quantity Contribution####################################
    df_otm['per_strike_qunatity_contribution'] = (df_otm['strike_diff'] * activation)/df_otm['strike_squared']
    df_otm['per_strike_quantity_term_adjustment'] = (2 * df_otm['per_strike_qunatity_contribution'])/t
    ###################################################################################################
    
    ######print("Sigma squared", sigma_squared)
    #display(HTML(df_otm.to_html(index=False, border=0)))

    return sigma_squared, df_otm, atm_strike


In [50]:
import requests
import json
import pandas as pd
from datetime import datetime
import numpy as np
from IPython.display import display, HTML

def create_dataset_from_data_stream(instrument_type, req_type, sec, start_date, end_date, expiry_date, ivl):
    # Define the base URL
    base_url = "http://127.0.0.1:25510/bulk_at_time"

    # Construct the request URL
    request_url = f"{base_url}/{instrument_type}/{req_type}"

    # Define the query parameters
    params = {
        "end_date": end_date,
        "exp": expiry_date,
        "ivl": ivl,
        "root": sec,
        "start_date": start_date
    }

    # Send the request
    response = requests.get(request_url, params=params)
    data = json.loads(response.text)
    df = pd.json_normalize(data['response'])
    tick_columns = ["ms_of_day", "bid_size", "bid_exchange", "bid", "bid_condition", "ask_size", "ask_exchange", "ask", "ask_condition", "date"]
    try:
        df[tick_columns] = df['ticks'].apply(lambda x: pd.Series(x[0]))
        df['strike'] = df['contract.strike']/1000
        df.sort_values(by=['strike'], inplace=True)
        df = df.rename(columns={'contract.right': 'option_type'})
        working_set = df[['strike', 'bid', 'ask', 'option_type']].copy()
        working_set.loc[:, 'px_last'] = 999
        ###working_set['px_last'] = 999
        

    except Exception as e:
        # Log the error message and the program state
        logger.error(f"Failed to fetch {response.url} on {start_date}")
        urls_with_error.append(f"Failed to fetch {response.url} on {start_date}")
        logger.error(f"Error: {str(e)}")
        logger.error(f"Expiry date: {expiry_date}")
        logger.error(f"Start date: {start_date}")
        logger.error(f"Program state: {df}")
        return None, response.url
        

    #working_set.to_csv('./historical/output.csv', index=False)
    #working_set.to_json('./historical/output.json', index=False)
    #display(HTML(working_set.to_html(index=False, border=0)))

    # Return the response
    return working_set

urls_with_error = []
trade_date, end_date, expiry_date = '20230214', '20230214', '20230215' 
#working_set = create_dataset_from_data_stream('option', 'quote', 'SPXW', trade_date, end_date, expiry_date, 57675000)

In [51]:
import datetime
# Time to expiration
import vix_utils

def spawn_backtest(vix_future_settlement_date, calc_date, run_on_data_stream = True, existing_df_near=None, existing_df_next=None):
    eod = {
        "year": vix_future_settlement_date.year,
        "month": vix_future_settlement_date.month,
        "day": vix_future_settlement_date.day,
        "hour": 16,
        "minute": 15,
        "second": 0
    }

    ###run_on_data_stream = True

    delta = abs(datetime.datetime.now() - calc_date)
    ####print(f"Time delta : {delta} ")
    custom_print("delta: ", delta)


    l_t1, l_t2 = chicago_eod_four_PM(
        eod, near_term=True), chicago_eod_four_PM(eod, near_term=False)

    start_date = calc_date.strftime('%Y%m%d')
    end_date =   calc_date.strftime('%Y%m%d')

    option_expiry_near = l_t1[1].strftime('%Y%m%d')
    option_expiry_next = l_t2[1].strftime('%Y%m%d')
    
    forward_days = (vix_future_settlement_date - calc_date).days * 1440

    n_t1 = l_t1[0] + forward_days
    n_t2 = l_t2[0] + forward_days

    ####print(f"n_t1 and n_t2 {n_t1} {n_t2} ")

    # extrapolated rates to expiration
    r_near, r_next = np.array(cubic_spline_risk_free_rate(
        [n_t1, n_t2], days = delta))/100
    custom_print("r_near and r_next", r_near, r_next, )
    custom_print(f"start_date : {start_date}, end_date : {end_date}, option_expiry_near : {option_expiry_near}, option_expiry_next : {option_expiry_next}")
    print(f"start_date : {start_date}, end_date : {end_date}, option_expiry_near : {option_expiry_near}, option_expiry_next : {option_expiry_next}")
    t_near = n_t1/525600
    t_next = n_t2/525600

    if(run_on_data_stream == True):
        if existing_df_near is None or existing_df_next is None:

            df_near = create_dataset_from_data_stream("option", "quote", "SPXW", start_date = start_date, 
                                                    end_date = start_date, expiry_date = option_expiry_near, ivl = 57675000)
            df_next = create_dataset_from_data_stream("option", "quote", "SPXW", start_date = start_date, 
                                                    end_date = start_date, expiry_date = option_expiry_next, ivl = 57675000)
        else:
            df_near = existing_df_near
            df_next = existing_df_next
        
        if df_near is None or df_next is None:
            error_urls.append(error_url)
            return None
    else:
        df_near = pd.read_csv('./dataset/dataset_20230317__atm_plus_1.csv')
        df_next = pd.read_csv('./dataset/dataset_20230324__atm_plus_1.csv')


    sigma_squared_near, df_vix_near, atm_near = chain_of_responsibility(df_near, r_near, t_near) 
    sigma_squared_next, df_vix_next, atm_next = chain_of_responsibility(df_next, r_next, t_next, False)
    
    atm_near_details = df_vix_near.loc[df_vix_near['strike'] == atm_near]
    atm_next_details = df_vix_next.loc[df_vix_next['strike'] == atm_next]                                        

    custom_print("Sigma squared near", sigma_squared_near)
    custom_print("Sigma squared next", sigma_squared_next)

    n_30 = 43200 + forward_days
    n_365 = 525600
    n_y_m = n_365/n_30

    tnear_sig_squared = sigma_squared_near * t_near
    tnext_sig_squared = sigma_squared_next * t_next


    near_dev = tnear_sig_squared * ((n_t2 - n_30)/(n_t2 - n_t1))
    next_dev = tnext_sig_squared * ((n_30 - n_t1)/(n_t2 - n_t1))
    tot_dev = near_dev + next_dev
    vix = np.sqrt(tot_dev * n_y_m) * 100

    print(f'Vix Forward on {start_date} - {vix}')
    #######print(f'atm_near - {atm_near}')
    ########print(f'atm_next - {atm_next}')
    
    df_near['mid_quote'] = (df_near['bid'] + df_near['ask']) / 2
    df_next['mid_quote'] = (df_next['bid'] + df_next['ask']) / 2
    df_vix_near = df_vix_near.rename(columns={'mid-quote': 'mid_quote'})
    df_vix_next = df_vix_next.rename(columns={'mid-quote': 'mid_quote'})

    ################################################
    df_vix_near['T1'] = t_near
    df_vix_near['(M_t2 - M_30)/(M_t2 - M_t1)'] = (n_t2 - n_30)/(n_t2 - n_t1)
    df_vix_near['M_365/M_30'] = n_y_m
    
    df_vix_near['per_strike_constants'] = df_vix_near['per_strike_quantity_term_adjustment'] * n_y_m * t_near * (n_t2 - n_30)/(n_t2 - n_t1)

    df_vix_next['T2'] = t_next
    df_vix_next['(M_30 - M_t1)/(M_t2 - M_t1)'] = (n_30 - n_t1)/(n_t2 - n_t1)
    df_vix_next['M_365/M_30'] = n_y_m

    df_vix_next['per_strike_constants'] = df_vix_next['per_strike_quantity_term_adjustment'] * n_y_m * t_next * (n_30 - n_t1)/(n_t2 - n_t1)

    ###############################################

    

    return vix, df_near, df_next, df_vix_near, df_vix_next, option_expiry_near, option_expiry_next


In [52]:
import os

def save_df_to_csv(df, path):
    # Extract directory from the path
    dir_path = os.path.dirname(path)

    # Check if the directory exists
    if not os.path.exists(dir_path):
        # If the directory doesn't exist, create it
        os.makedirs(dir_path)

    # Save the DataFrame to a CSV file
    df.to_csv(path, index=False)  

In [53]:
import os

def create_directory_structure(trade_date):
    # Define the directories to be created
    directories = [
        f'./result-set/{trade_date}',
        f'./result-set/{trade_date}/future',
        f'./result-set/{trade_date}/future/abridged',
        f'./result-set/{trade_date}/P&L',
        f'./result-set/{trade_date}/P&L/Delta-25&Above',
        f'./result-set/{trade_date}/spot'
        './result-set/P&L-across-days',
        './result-set/P&L-across-days/abridged'

        
    ]

    # Iterate over the directories
    for directory in directories:
        # Check if the directory does not already exist
        if not os.path.exists(directory):
            # Create the directory
            os.makedirs(directory)

In [54]:
import pandas as pd

def calculate_quantity_per_strike(row, vix_index):
    # Calculate the quantity per strike
    q  = (10000 *row.per_strike_constants)/(2*vix_index) 
    return q

In [91]:
def get_future_price_new(row, df2):
    if row['option_type'] == 'ATM Avg Put/Call':
        matching_rows = df2[df2['strike'] == row['strike']]
        if not matching_rows.empty:
            future_price = matching_rows['mid_quote'].mean()
            pl = row['quantity'] * (future_price - row['mid_quote'])
            return pd.Series([future_price, pl])
    else:
        matching_row = df2[(df2['strike'] == row['strike']) & (df2['option_type'] == row['option_type'])]
        if not matching_row.empty:
            future_price = matching_row['mid_quote'].values[0]
            pl = row['quantity'] * (future_price - row['mid_quote'])
            return pd.Series([future_price, pl])
    return pd.Series([None, None])

In [56]:
import vix_utils, logging, asyncio
import pandas as pd

vix_futures,vix_cash=await asyncio.gather(vix_utils.async_load_vix_term_structure(),vix_utils.async_get_vix_index_histories())

def vix_futures_calc(trade_date, settlement_date):
    filtered_df = vix_futures[(vix_futures['Trade Date'] == trade_date) & (vix_futures['Tenor_Monthly'] == 1.0)]
    settlement_df = vix_futures[(vix_futures['Trade Date'] == settlement_date) & (vix_futures['Tenor_Monthly'] == 1.0)]
    if not filtered_df.empty:
        vix_futures_on_trade_day = filtered_df['Close'].values[0]
        vix_futures_on_settlement_day = settlement_df['Close'].values[0]
        return vix_futures_on_trade_day, vix_futures_on_settlement_day    
    return None, None

In [57]:
import mibian
import datetime
import pandas as pd

def calc_implied_vol_delta_and_parity(row, underlying_price, days_to_expiry):
    interest_rate = 0.046
    delta_type = {'C': 'callDelta', 'P': 'putDelta', 'ATM Avg Put/Call': 'putDelta'}

    def calculate_mibian_bs(call_price, put_price, implied_vol=None):
        if implied_vol is None:
            ###print(f'Call Price: {call_price}, Put Price: {put_price}')
            c = mibian.BS([underlying_price, row['strike'], interest_rate, days_to_expiry], callPrice=call_price, putPrice=put_price)
            implied_vol = c.impliedVolatility
        c = mibian.BS([underlying_price, row['strike'], interest_rate, days_to_expiry], volatility=implied_vol)
        delta = getattr(c, delta_type[row['option_type']])
        return implied_vol, delta

    call_price, put_price = (row['mid_quote'], None) if row['option_type'] == 'C' else (None, row['mid_quote'])
    implied_vol, delta = calculate_mibian_bs(call_price, put_price)


    return pd.Series([implied_vol, delta])

In [58]:
def construct_atm_df(df):

    # Find the row where option_type is 'ATM Avg Put/Call'
    atm_avg_put_call_row = df[df['option_type'] == 'ATM Avg Put/Call']

    # Get the strike value from this row
    atm_avg_put_call_strike = atm_avg_put_call_row['strike'].values[0]

    # Find the rows in df where strike matches atm_avg_put_call_strike
    matching_rows = df[df['strike'] == atm_avg_put_call_strike]

    # Exclude the 'ATM Avg Put/Call' row from matching_rows
    new_df = matching_rows[matching_rows['option_type'] != 'ATM Avg Put/Call']
    
    return new_df

In [159]:
import pandas as pd

def filter_delta_across_terms(df, bound = 0.25):

    def find_unique_closest_strikes(df, option_type, target_deltas):
        """
        Finds unique closest strikes for given target deltas for the specified option type.
        """
        filtered_df = df[df['option_type'] == option_type]
        selected_strikes = pd.DataFrame()

        for delta in target_deltas:
            # Adjust for negative delta in case of Puts
            adjusted_delta = -abs(delta) if option_type == 'P' else abs(delta)

            # Exclude already selected strikes
            available_df = filtered_df[~filtered_df.index.isin(selected_strikes.index)]

            # Find the closest strike
            closest_idx = (available_df['delta'] - adjusted_delta).abs().idxmin()
            selected_strikes = pd.concat([selected_strikes, available_df.loc[[closest_idx]]])

        return selected_strikes

    target_deltas = [0.1, 0.25]
    atm_df = construct_atm_df(df)


    # Apply the function for Calls and Puts
    closest_strikes_calls = find_unique_closest_strikes(df, 'C', target_deltas)
    closest_strikes_puts = find_unique_closest_strikes(df, 'P', target_deltas)

    # Combine results
    closest_strikes = pd.concat([closest_strikes_calls, closest_strikes_puts])
    candidate_df = pd.concat([closest_strikes, atm_df])

    ##display(HTML(candidate_df.to_html(index=False, border=0)))
    return candidate_df

In [60]:
def add_left_overs_for_future_ref(df_final, df_near):

    filtered_df_near = df_final[df_final['option_type'] == 'ATM Avg Put/Call']
    filtered_df_near = filtered_df_near.rename(columns={'mid_quote': 'mid_quote_filtered', 'option_type': 'option_type_filtered'})

    matching_strikes_df_near = df_near[df_near['strike'].isin(filtered_df_near['strike'])]
    
    # Drop overlapping columns from filtered_df_near
    overlapping_columns = [col for col in filtered_df_near.columns if col in matching_strikes_df_near.columns and col != 'strike']
    filtered_df_near = filtered_df_near.drop(columns=overlapping_columns)
    df_final['px_last'] = 999

    # Merge matching_strikes_df_near and filtered_df_near
    merged_df = pd.merge(matching_strikes_df_near, filtered_df_near, on='strike', how='left')
    merged_df.drop(columns=['mid_quote_filtered', 'option_type_filtered'], inplace=True)

    # Check if columns are the same in df_final and merged_df
    if set(df_final.columns) != set(merged_df.columns):
        print("missing clumns merged_df: ", set(df_final.columns) - set(merged_df.columns))
        print("missing clumns df_final: ", set(merged_df.columns) - set(df_final.columns))
        raise ValueError("Columns in df_final and merged_df are not the same")

    # Append merged_df to df_final
    df_final = pd.concat([df_final, merged_df], ignore_index=True)

    return df_final

In [61]:
def add_vol_and_delta(df, trade_date, expiry_date):
    #####days_to_expiry = (datetime.datetime.strptime(expiry_date, "%Y%m%d") - datetime.datetime.strptime(trade_date, "%Y-%m-%d")).days
    
    days_to_expiry = (datetime.datetime.strptime(expiry_date, "%Y%m%d") - datetime.datetime.strptime(trade_date, "%Y%m%d")).days
    forward = df[df['option_type'] == 'ATM Avg Put/Call']['strike'].values[0]
    df[['Implied Vol', 'delta']] = df.apply(calc_implied_vol_delta_and_parity, args=(forward, days_to_expiry), axis=1)

    return df

In [129]:
def populate_quantity_prices_other_details(df_near, df_next, df_settlement_near, df_settlement_next, tuple_vix, tuple_vix_futures):
    
    df_near['quantity'] = df_near.apply(calculate_quantity_per_strike, axis=1, args=(tuple_vix[1],))
    df_next['quantity'] = df_next.apply(calculate_quantity_per_strike, axis=1, args=(tuple_vix[1],))

    df_near[['settlement_day_prices', 'per_strike_P&L']] = df_near.apply(get_future_price_new, args=(df_settlement_near,), axis=1)
    df_next[['settlement_day_prices', 'per_strike_P&L']] = df_next.apply(get_future_price_new, args=(df_settlement_next,), axis=1)
    
    df_near['vix_forward'], df_near['new_vix_forward'] = tuple_vix
    df_next['vix_forward'], df_next['new_vix_forward'] = tuple_vix
    
    df_near['vix_futures_on_trade_date'],df_near['vix_futures_on_settlement_date'] = tuple_vix_futures
    df_next['vix_futures_on_trade_date'],df_next['vix_futures_on_settlement_date'] = tuple_vix_futures

    return df_near, df_next

In [155]:
def calculate_pl_for_the_day(vix, df_near, df_next):

    vix_futures_on_trade_date = df_near['vix_futures_on_trade_date'].values[0]
    vix_futures_on_settlement_date = df_near['vix_futures_on_settlement_date'].values[0]
    
    if vix_futures_on_trade_date > vix :

        df_near['strategy']  = "Buy Basket sell Futures"
        df_near['future_P&L'] = vix_futures_on_trade_date - vix_futures_on_settlement_date
        ###df_near['P&L'] = (df_near['quantity'] * (df_near['settlement_day_prices'] - df_near['mid_quote'])).sum() +(vix_futures_on_trade_date - vix_futures_on_settlement_date)
        #####df_near['P&L'] = df_near['per_strike_P&L'].sum() +(vix_futures_on_trade_date - vix_futures_on_settlement_date)
        df_near['P&L'] = df_near['per_strike_P&L'].sum() + df_near['future_P&L'].iloc[0]
            
        df_next['strategy']  = "Buy Basket sell Futures"
        df_next['future_P&L'] = vix_futures_on_trade_date - vix_futures_on_settlement_date
        ###df_next['P&L'] = (df_next['quantity'] * (df_next['settlement_day_prices'] - df_next['mid_quote'])).sum() +(vix_futures_on_trade_date - vix_futures_on_settlement_date  )
        ####df_next['P&L'] = df_next['per_strike_P&L'].sum() +(vix_futures_on_trade_date - vix_futures_on_settlement_date  )
        df_next['P&L'] = df_next['per_strike_P&L'].sum() + df_next['future_P&L'].iloc[0]
            
    else:
    
        df_near['strategy']  = "Buy Vix futures sell Basket"
        df_near['per_strike_P&L'] = df_near['per_strike_P&L'] * -1
        df_near['future_P&L'] = vix_futures_on_settlement_date - vix_futures_on_trade_date
        ###df_near['P&L'] = (df_near['quantity'] * (df_near['mid_quote'] - df_near['settlement_day_prices'])).sum() + (vix_futures_on_settlement_date - vix_futures_on_trade_date)
        df_near['P&L'] = df_near['per_strike_P&L'].sum() + df_near['future_P&L'].iloc[0]
        
        
        df_next['strategy']  = "Buy Vix futures sell Basket"
        df_next['per_strike_P&L'] = df_next['per_strike_P&L'] * -1
        df_next['future_P&L'] = vix_futures_on_settlement_date - vix_futures_on_trade_date
        ###df_next['P&L'] = (df_next['quantity'] * (df_next['settlement_day_prices'] - df_next['mid_quote'])).sum() + (vix_futures_on_settlement_date - vix_futures_on_trade_date  )
        df_next['P&L'] = df_next['per_strike_P&L'].sum() + df_next['future_P&L'].iloc[0]

    combined_df = pd.concat([df_near, df_next], ignore_index=True)
    #####We are adding Future P&L twice in the combined_df so subsctracting it once
    combined_df['P&L'] = df_near['P&L'].iloc[0] + df_next['P&L'].iloc[0] - df_near['future_P&L'].iloc[0]
    
    ##### This is more correct way to calculate P&L  the other code is working so will not change as of now
    #######combined_df['P&L'] = combined_df['per_strike_P&L'].sum()  + combined_df['future_P&L'].values[0]
    
    return df_near, df_next, combined_df

In [133]:
def create_top_level_pl_df(combined_df, formatted_trade_date_str, basket_near_total, basket_next_total, vix):
    
    # Create a DataFrame with the top-level P&L information
    basket_pl = combined_df['per_strike_P&L'].sum()
    future_pl = combined_df['future_P&L'].values[0]
    total_pl = basket_pl + future_pl
    vix_forward = vix
    vix_futures_on_trade_day = combined_df['vix_futures_on_trade_date'].values[0]
    vix_futures_on_settlement_day = combined_df['vix_futures_on_settlement_date'].values[0]



    df_summary = pd.DataFrame({
                        'trade_date': [formatted_trade_date_str],
                        'near-contribution': [basket_near_total],
                        'next-contribution': [basket_next_total],
                        'vix-forward': [vix_forward],
                        'vix-futures-on-trade_day': [vix_futures_on_trade_day],
                        'vix-futures-on-settlement_day': [vix_futures_on_settlement_day],
                        'Basket-P&L': basket_pl,
                        'Future-P&L': future_pl,
                        'Total-P&L': total_pl 
                    })
    
    return df_summary
    

In [163]:
###Backtesting for a single date
import datetime

def run_backtest(year, month, day):
    try:
        ins_type = "future"
        
        v = vix_utils.vix_futures_expiry_date_from_trade_date(year, month, day, 1) if ins_type == "future" else datetime.datetime(year, month, day)
        on_day = datetime.datetime.combine(datetime.datetime(year, month, day), datetime.time(16, 15, 0))
        formatted_trade_date_str = on_day.strftime("%Y%m%d")
        vix_future_settlement_day = datetime.datetime.combine(v, datetime.time())
        print(f"Replicating Vix {ins_type} for {v} on {on_day}")
        
        create_directory_structure(on_day.strftime('%Y-%m-%d'))
        vix, df_near, df_next, df_vix_near, df_vix_next, option_expiry_near, option_expiry_next= spawn_backtest(vix_future_settlement_day, on_day)
        
        #######################################################
        df_per_strike_quantity_near = df_vix_near[['strike', 'option_type', 'bid', 'ask','mid_quote','per_strike_constants' ]].copy()
        df_per_strike_quantity_next = df_vix_next[['strike', 'option_type', 'bid', 'ask','mid_quote', 'per_strike_constants']].copy()
        
        observation_date = on_day.strftime('%Y-%m-%d')
        
        df_per_strike_quantity_near['vix_forward'] = vix
        df_per_strike_quantity_next['vix_forward'] = vix
        df_per_strike_quantity_near['quantity'] = df_per_strike_quantity_near.apply(calculate_quantity_per_strike, axis=1, args=(vix,))
        df_per_strike_quantity_next['quantity'] = df_per_strike_quantity_next.apply(calculate_quantity_per_strike, axis=1, args=(vix,))
        
        settlement_day_str = vix_future_settlement_day.date().strftime('%Y%m%d')

        df_settlement_near = create_dataset_from_data_stream("option", "quote", "SPXW", start_date = settlement_day_str, 
                                                end_date = settlement_day_str, expiry_date = option_expiry_near, ivl = 57675000)
        df_settlement_next = create_dataset_from_data_stream("option", "quote", "SPXW", start_date = settlement_day_str, 
                                                end_date = settlement_day_str, expiry_date = option_expiry_next, ivl = 57675000)
        
        df_settlement_near['mid_quote'] = (df_settlement_near['bid'] + df_settlement_near['ask']) / 2
        df_settlement_next['mid_quote'] = (df_settlement_next['bid'] + df_settlement_next['ask']) / 2
        
        df_per_strike_quantity_near[['settlement_day_prices', 'per_strike_P&L']] = df_per_strike_quantity_near.apply(get_future_price_new, args=(df_settlement_near,), axis=1)
        df_per_strike_quantity_next[['settlement_day_prices', 'per_strike_P&L']] = df_per_strike_quantity_next.apply(get_future_price_new, args=(df_settlement_next,), axis=1)

        
#####################################################################################################
        vix_futures_on_trade_date, vix_futures_on_settlement_date = vix_futures_calc(on_day.date().strftime('%Y-%m-%d'), vix_future_settlement_day.date().strftime('%Y-%m-%d'))
        print(f"Vix futures on {on_day.date().strftime('%Y-%m-%d')} {vix_futures_on_trade_date} and {vix_future_settlement_day.date().strftime('%Y-%m-%d')} {vix_futures_on_settlement_date}")
        tuple_vix_futures = (vix_futures_on_trade_date, vix_futures_on_settlement_date)
        # tuple_vix = (vix, vix)

        # df_per_strike_quantity_near, df_per_strike_quantity_near =  populate_quantity_prices_other_details(df_per_strike_quantity_near, 
        #                                         df_per_strike_quantity_next, df_settlement_near, df_settlement_next, tuple_vix, tuple_vix_futures)


        df_per_strike_quantity_near['vix_futures_on_trade_date'] = vix_futures_on_trade_date
        df_per_strike_quantity_near['vix_futures_on_settlement_date'] = vix_futures_on_settlement_date
        df_per_strike_quantity_next['vix_futures_on_trade_date'] = vix_futures_on_trade_date
        df_per_strike_quantity_next['vix_futures_on_settlement_date'] = vix_futures_on_settlement_date

        # if vix_futures_on_trade_date > vix :
        #     df_per_strike_quantity_near['strategy']  = "Buy Basket sell Futures"
        #     df_per_strike_quantity_near['future_P&L'] = vix_futures_on_trade_date - vix_futures_on_settlement_date
        #     df_per_strike_quantity_near['P&L'] = (df_per_strike_quantity_near['quantity'] * (df_per_strike_quantity_near['settlement_day_prices'] - df_per_strike_quantity_near['mid_quote'])).sum() +(vix_futures_on_trade_date - vix_futures_on_settlement_date)
            
        #     df_per_strike_quantity_next['strategy']  = "Buy Basket sell Futures"
        #     df_per_strike_quantity_next['future_P&L'] = vix_futures_on_trade_date - vix_futures_on_settlement_date
        #     df_per_strike_quantity_next['P&L'] = (df_per_strike_quantity_next['quantity'] * (df_per_strike_quantity_next['settlement_day_prices'] - df_per_strike_quantity_next['mid_quote'])).sum() +(vix_futures_on_trade_date - vix_futures_on_settlement_date  )
            
        #     # combined_df['strategy']  = "Buy Basket sell Futures"
        #     # combined_df['future_P&L'] = vix_futures_on_trade_date - vix_futures_on_settlement_date
        #     # combined_df['P&L'] = (combined_df['quantity'] * (combined_df['settlement_day_prices'] - combined_df['mid_quote'])).sum() +(vix_futures_on_trade_date - vix_futures_on_settlement_date  )
            
        # else:
        #     df_per_strike_quantity_near['strategy']  = "Buy Vix futures sell Basket"
        #     df_per_strike_quantity_near['per_strike_P&L'] = df_per_strike_quantity_near['per_strike_P&L'] * -1
        #     df_per_strike_quantity_near['future_P&L'] = vix_futures_on_settlement_date - vix_futures_on_trade_date
        #     df_per_strike_quantity_near['P&L'] = (df_per_strike_quantity_near['quantity'] * (df_per_strike_quantity_near['mid_quote'] - df_per_strike_quantity_near['settlement_day_prices'])).sum() + (vix_futures_on_settlement_date - vix_futures_on_trade_date)
            
            
        #     df_per_strike_quantity_next['strategy']  = "Buy Vix futures sell Basket"
        #     df_per_strike_quantity_next['per_strike_P&L'] = df_per_strike_quantity_next['per_strike_P&L'] * -1
        #     df_per_strike_quantity_next['future_P&L'] = vix_futures_on_settlement_date - vix_futures_on_trade_date
        #     df_per_strike_quantity_next['P&L'] = (df_per_strike_quantity_next['quantity'] * (df_per_strike_quantity_next['settlement_day_prices'] - df_per_strike_quantity_next['mid_quote'])).sum() + (vix_futures_on_settlement_date - vix_futures_on_trade_date  )
            
        df_per_strike_quantity_near, df_per_strike_quantity_next, _ = calculate_pl_for_the_day(vix, df_per_strike_quantity_near, df_per_strike_quantity_next)
        

        df_per_strike_quantity_near = add_vol_and_delta(df_per_strike_quantity_near,formatted_trade_date_str, option_expiry_near)
        df_per_strike_quantity_next = add_vol_and_delta(df_per_strike_quantity_next,formatted_trade_date_str, option_expiry_next)
        
        combined_df = pd.concat([df_per_strike_quantity_near, df_per_strike_quantity_next], ignore_index=True)
        
        combined_df['P&L'] = combined_df['per_strike_P&L'].sum()  + combined_df['future_P&L'].values[0]
    

        save_df_to_csv(df_per_strike_quantity_near, f'./result-set/{observation_date}/{ins_type}/dataset_{option_expiry_near}.csv')
        save_df_to_csv(df_per_strike_quantity_next, f'./result-set/{observation_date}/{ins_type}/dataset_{option_expiry_next}.csv')
        save_df_to_csv(combined_df, f'./result-set/{observation_date}/{ins_type}/consolidated_dataset_{on_day.date()}.csv')

        basket_near_total = df_per_strike_quantity_near['per_strike_P&L'].sum()
        basket_next_total = df_per_strike_quantity_next['per_strike_P&L'].sum()
        top_level_pl_df =  create_top_level_pl_df(combined_df, formatted_trade_date_str, basket_near_total, basket_next_total, vix)
        
        save_df_to_csv(top_level_pl_df, f'./result-set/P&L-across-days/top_level_pl_{formatted_trade_date_str}.csv')

        #######D not move this code up it's a hack for timebeing to get the abridged dataset#######################

        df_per_strike_quantity_near = add_left_overs_for_future_ref(df_per_strike_quantity_near, df_near)
        df_per_strike_quantity_next = add_left_overs_for_future_ref(df_per_strike_quantity_next, df_next)
        #####################################hack ends#########################################################


#######################################################New Block######################################################
        
        
        abridged_df_near = filter_delta_across_terms(df_per_strike_quantity_near).copy()
        abridged_df_next = filter_delta_across_terms(df_per_strike_quantity_next).copy()
        
        abridged_df_near = abridged_df_near[['strike', 'option_type', 'bid', 'ask', 'px_last']]
        abridged_df_next = abridged_df_next[['strike', 'option_type', 'bid', 'ask', 'px_last']]

        vix_new, df_abridged_near, df_abridged_next, df_vix_abridged_near, df_vix_abridged_next, option_expiry_near, option_expiry_next = spawn_backtest(vix_future_settlement_day, on_day, 
                                                                                                  existing_df_near=abridged_df_near, existing_df_next=abridged_df_next)
        
        df_vix_abridged_near = df_vix_abridged_near[['strike', 'option_type', 'bid', 'ask','mid_quote','per_strike_constants' ]].copy()
        df_vix_abridged_next = df_vix_abridged_next[['strike', 'option_type', 'bid', 'ask','mid_quote', 'per_strike_constants']].copy()
        tuple_vix_new = (vix, vix_new)
        
        df_vix_abridged_near, df_vix_abridged_next =  populate_quantity_prices_other_details(df_vix_abridged_near, df_vix_abridged_next, 
                                                                    df_settlement_near, df_settlement_next, tuple_vix_new, tuple_vix_futures)
        
        df_vix_abridged_near, df_vix_abridged_next, df_combined_abridged = calculate_pl_for_the_day(vix, df_vix_abridged_near, df_vix_abridged_next)
                                                                                                             
        
        save_df_to_csv(df_vix_abridged_near, f'./result-set/{observation_date}/{ins_type}/abridged/abridged_dataset_{option_expiry_near}.csv')
        save_df_to_csv(df_vix_abridged_next, f'./result-set/{observation_date}/{ins_type}/abridged/abridged_dataset_{option_expiry_next}.csv')
        save_df_to_csv(df_combined_abridged, f'./result-set/{observation_date}/{ins_type}/abridged/abridged_consolidated_dataset_{on_day.date()}.csv')

        basket_abridged_near_total = df_vix_abridged_near['per_strike_P&L'].sum()
        basket_abridged_next_total = df_vix_abridged_next['per_strike_P&L'].sum()

        top_level_pl_df =  create_top_level_pl_df(df_combined_abridged, formatted_trade_date_str, basket_abridged_near_total, basket_abridged_next_total, vix)
        
        save_df_to_csv(top_level_pl_df, f'./result-set/P&L-across-days/abridged/top_level_pl_{formatted_trade_date_str}.csv')
                                                    
    except Exception as e:
        print(f"Error occurred on {year}-{month}-{day}: {e}")
        return None

results = {}
error_urls = []
business_days = pd.bdate_range(start='2023-02-07', end='2023-02-07')
for day in business_days:
    result = run_backtest(day.year, day.month, day.day)

with open('error_urls.txt', 'w') as f:
    for url in error_urls:
        f.write(url + '\n')
        

Replicating Vix future for 2023-02-15 on 2023-02-07 16:15:00
start_date : 20230207, end_date : 20230207, option_expiry_near : 20230317, option_expiry_next : 20230324
Vix Forward on 20230207 - 19.793021206730558
Vix futures on 2023-02-07 19.49 and 2023-02-15 18.9
start_date : 20230207, end_date : 20230207, option_expiry_near : 20230317, option_expiry_next : 20230324
Vix Forward on 20230207 - 18.89229031785105


In [65]:
import pandas as pd

df_near =pd.read_csv('./result-set/2023-02-07/future/dataset_20230317.csv')
df_next = pd.read_csv('./result-set/2023-02-07/future/dataset_20230324.csv')
df_consolidated = pd.concat([df_near, df_next], ignore_index=True)

df_consolidated.to_csv('./result-set/2023-02-07/future/dataset_consolidated.csv', index=False)
