In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
from sklearn.datasets import make_blobs 
from sklearn.cluster import KMeans
plt.style.use('ggplot')


In [5]:
import numpy as np
import pandas as pd
from scipy.stats import norm

def black_scholes(S, K, T, r, sigma, option_type="call"):
    d1 = (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))
    d2 = d1 - sigma * np.sqrt(T)

    if option_type == "call":
        option_price = S * norm.cdf(d1) - K * np.exp(-r * T) * norm.cdf(d2)
    else:
        option_price = K * np.exp(-r * T) * norm.cdf(-d2) - S * norm.cdf(-d1)

    return option_price

def daily_hedging_pnl(data, daily_hedging_time):
    data['quote_datetime'] = pd.to_datetime(data['quote_datetime'])
    data['expiration'] = pd.to_datetime(data['expiration'])
    data.sort_values(by='quote_datetime', inplace=True)

    hedge_pnl = pd.DataFrame({'quote_datetime': data['quote_datetime']})
    unique_roots = data['root'].unique()

    for root in unique_roots:
        root_data = data[data['root'] == root]
        unique_expirations = root_data['expiration'].unique()

        for expiration in unique_expirations:
            expiration_data = root_data[root_data['expiration'] == expiration]
            unique_option_types = expiration_data['option_type'].unique()

            for option_type in unique_option_types:
                option_data = expiration_data[expiration_data['option_type'] == option_type]
                unique_strikes = option_data['strike'].unique()

                for strike in unique_strikes:
                    quote = option_data[option_data['strike'] == strike]
                    quote.reset_index(drop=True, inplace=True)

                    option_prices = []
                    for j in range(len(quote)):
                        if j % daily_hedging_time == 0:
                            S = (quote.at[j, 'bid'] + quote.at[j, 'ask']) / 2  # Average of bid and ask as the option price
                            T = (expiration - quote.at[j, 'quote_datetime']).days / 365.0
                            r = 0.01  # Risk-free rate (you can change this)
                            sigma = quote.at[j, 'implied_volatility']

                            option_price = black_scholes(S, strike, T, r, sigma, option_type)
                            option_prices.append(option_price)

                    pnl = option_prices - option_prices[0]  # Daily P&L is the difference from the initial price
                    print(f"Length of pnl: {len(pnl)}")
                    print(f"Length of hedge_pnl: {len(hedge_pnl)}")
                    hedge_pnl[f"{root} {expiration} {option_type} {strike} Strike"] = pnl

    return hedge_pnl

if __name__ == "__main__":
    # Load data from CSV or your preferred data source
    # Replace 'your_dataset.csv' with the actual file name and path
    data = pd.read_csv('C:/Users/my/Downloads/a.csv')

    # Define parameters
    daily_hedging_time = 7  # Number of days between each hedging trade

    # Backtest the hedging strategy
    hedge_pnl = daily_hedging_pnl(data, daily_hedging_time)

    # Save results to CSV
    hedge_pnl.to_csv("hedging_pnl_results.csv")

    print("Backtesting completed. Results saved to hedging_pnl_results.csv.")


  d1 = (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))


Length of pnl: 43
Length of hedge_pnl: 128092437


ValueError: Length of values (43) does not match length of index (128092437)

In [3]:
import numpy as np
import pandas as pd
from scipy.stats import norm

def black_scholes(S, K, T, r, sigma, option_type="call"):
    d1 = (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))
    d2 = d1 - sigma * np.sqrt(T)

    if option_type == "call":
        option_price = S * norm.cdf(d1) - K * np.exp(-r * T) * norm.cdf(d2)
    else:
        option_price = K * np.exp(-r * T) * norm.cdf(-d2) - S * norm.cdf(-d1)

    return option_price

def daily_hedging_pnl(data, daily_hedging_time):
    data['quote_datetime'] = pd.to_datetime(data['quote_datetime'])
    data['expiration'] = pd.to_datetime(data['expiration'])
    data.sort_values(by='quote_datetime', inplace=True)

    # Get unique options
    options = data[['root', 'expiration', 'strike', 'option_type']].drop_duplicates()

    hedge_pnl = pd.DataFrame(index=data['quote_datetime'].drop_duplicates())
    for i, option in options.iterrows():
        #underlying_symbol = option['underlying_symbol']
        root = option['root']
        expiration = option['expiration']
        strike = option['strike']
        option_type = option['option_type']

        option_data = data[(data['root'] == root) &
                           (data['expiration'] == expiration) &
                           (data['strike'] == strike) &
                           (data['option_type'] == option_type)]

        option_prices = []
        for j, quote in option_data.iterrows():
            if j % daily_hedging_time == 0:
                S = (quote['bid'] + quote['ask']) / 2  # Average of bid and ask as the option price
                T = (expiration - quote['quote_datetime']).days / 365.0
                r = 0.01  # Risk-free rate (you can change this)
                sigma = quote['implied_volatility']

                option_price = black_scholes(S, strike, T, r, sigma, option_type)
                option_prices.append(option_price)

        pnl = option_prices - option_prices[0]  # Daily P&L is the difference from the initial price
        hedge_pnl[f"{root} {expiration} {option_type} {strike} Strike"] = pnl

    return hedge_pnl

if __name__ == "__main__":
    # Load data from CSV or your preferred data source
    # Replace 'your_dataset.csv' with the actual file name and path
    data = pd.read_csv('C:/Users/my/Downloads/a.csv')

    # Define parameters
    daily_hedging_time = 7  # Number of days between each hedging trade

    # Backtest the hedging strategy
    hedge_pnl = daily_hedging_pnl(data, daily_hedging_time)

    # Save results to CSV
    hedge_pnl.to_csv("hedging_pnl_results.csv")

    print("Backtesting completed. Results saved to hedging_pnl_results.csv.")



  d1 = (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))


ValueError: Length of values (42) does not match length of index (26922)