In [None]:
import pandas as pd
import numpy as np
import mibian
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
def preprocess(df):
    df['date'] = pd.to_datetime(df['date'])
    df.sort_values(by=['date'], inplace=True)
    return df

eth = preprocess(pd.read_csv('../data processing/deribit lyra comparison/eth.csv'))
btc = preprocess(pd.read_csv('../data processing/deribit lyra comparison/btc.csv'))

In [None]:
def create_put_call_parity(dataset, time_delta_in_minutes):
    dataset.reset_index(drop=True, inplace=True)
    dataset['datetime'] = pd.to_datetime(dataset['datetime'], format='%Y-%m-%d %H:%M:%S')

    # Filter calls and puts into separate DataFrames
    calls = dataset[dataset['type'] == 'call'].copy()
    puts = dataset[dataset['type'] == 'put'].copy()
    # We ned to ensure data is sorted by datetime for the merge_asof to work effectively
    calls = calls.sort_values(by='datetime')
    puts = puts.sort_values(by='datetime')

    # Use merge_asof to find matching entries within a 5-minute window
    # merge_asof is used here because it's designed for time series data where exact matches are not necessary
    # 'direction': 'nearest' matches to the nearest key within the limit (5 min here)
    merged = pd.merge_asof(calls, puts, on='datetime', by=['strike', 'expiry'],
                        suffixes=('_call', '_put'),
                        tolerance=pd.Timedelta(f'{time_delta_in_minutes} minutes'),
                        direction='nearest')

    # Filter out pairs where calls and puts are not from the same time frame within the 5-minute limit
    merged = merged.dropna(subset=['type_put'])  # This assumes that the merge resulted in some NaNs for non-matching rows

    # Reset index if necessary
    merged.reset_index(drop=True, inplace=True)
    # Display the merged DataFrame
    return merged

eth = create_put_call_parity(eth, 30)

In [None]:
# Put-call parity formula: C - P = S - K * e^(-rT)

def calculate_put_call_disparities(dataset):
    # merged['interest_rate_call'] = merged['interest_rate_call'] / 100
    # Calculate the theoretical parity and the actual disparity
    dataset['theoretical_parity'] = dataset['selected_spot_price_call'] - dataset['strike'] * np.exp(-dataset['interest_rate_call'] * dataset['time_to_maturity_call'] / 100)
    dataset['observed_difference'] = abs(dataset['lyra_price_call']) - abs(dataset['lyra_price_put'])
    dataset['observed_difference_deribit'] = abs(dataset[f'selected_deribit_price_call']) - abs(dataset[f'selected_deribit_price_put'])

    dataset['disparity'] = dataset['observed_difference'] - dataset['theoretical_parity']
    dataset['deribit_disparity'] = dataset['observed_difference_deribit'] - dataset['theoretical_parity']


    discrepancies = dataset[['datetime', 'expiry', 'strike', 'observed_difference', 'observed_difference_deribit', 'theoretical_parity', 'disparity', 'deribit_disparity', 'selected_spot_price_call', 'interest_rate_call', 'time_to_maturity_call', 'lyra_price_call', 'lyra_price_put', 'lyra_IV_call', 'lyra_IV_put', 'selected_deribit_IV_call', 'selected_deribit_IV_put', 'selected_deribit_price_call', 'selected_deribit_price_put']]
    discrepancies.columns = ['trade_time', 'expiry', 'strike', 'C-P', 'Deribit-C-P', 'S-Ke^-rT', 'disparity', 'deribit_disparity', 'S', 'r', 'T', 'C', 'P', 'C_IV', 'P_IV', 'Deribit_C_IV', 'Deribit_P_IV', 'Deribit_C', 'Deribit_P']
    return discrepancies

put_call_parity_eth = calculate_put_call_disparities(eth)
put_call_parity_eth.to_csv('../data processing/put call parity/eth_put_call_parity.csv', index=False)