In [10]:
import nest_asyncio
nest_asyncio.apply()

import asyncio
import numpy as np
import pandas as pd
from ib_insync import *
import ta
import plotly.graph_objects as go
from scipy.stats import norm
from scipy.optimize import brentq

# --- Parameters ---
symbol = 'GOOG'
expiry = '20251219'
strike = 180
right = 'C'
durationStr = '30 D'
barSizeSetting = '1 min'
r = 0.044

SYMBOL = 'GOOG'
EXCHANGE = 'SMART'
CURRENCY = 'USD'
BAR_SIZE = '1 min'
DURATION = '30 D'
NUM_BARS = 8000

VOL_WINDOW = 30
VOL_THRESHOLD = 0.0001
REGIME_THRESHOLD = -0.01
ADX_PERIOD = 14
ADX_THRESHOLD = 20
SMA_PERIOD = 20
LOOKAHEAD = 4
K_NEIGHBORS = 8

OPTION_FEATURES = [ 'IV', 'Delta', 'Gamma']
STOCK_FEATURES = ['RSI', 'ADX', 'CCI', 'MACD', 'SMA']
COMBINED_FEATURES = OPTION_FEATURES + STOCK_FEATURES

# --- IB async fetchers ---
async def get_option_stock_data(symbol, expiry, strike, right, duration, barsize):
    ib = IB()
    await ib.connectAsync('127.0.0.1', 7497, clientId=np.random.randint(1000, 9999))

    stock = Stock(symbol, 'SMART', 'USD')
    opt = Option(symbol, expiry, strike, right, 'SMART')

    await ib.qualifyContractsAsync(stock)
    await ib.qualifyContractsAsync(opt)

    stock_bars = await ib.reqHistoricalDataAsync(stock, '', duration, barsize, 'TRADES', True, 1)
    opt_bars = await ib.reqHistoricalDataAsync(opt, '', duration, barsize, 'TRADES', True, 1)

    ib.disconnect()

    df_stock = util.df(stock_bars).set_index('date').rename(columns={'close': 'Stock Price'})
    df_opt = util.df(opt_bars).set_index('date').rename(columns={'close': 'Option Close'})

    df = df_stock[['Stock Price']].join(df_opt[['Option Close']], how='inner')
    return df


async def fetch_ibkr_stock(symbol, exchange, currency, bar_size, duration, num_bars):
    ib = IB()
    await ib.connectAsync('127.0.0.1', 7497, clientId=np.random.randint(1000, 9999))
    contract = Stock(symbol, exchange, currency)
    await ib.qualifyContractsAsync(contract)
    bars = await ib.reqHistoricalDataAsync(
        contract, '', duration, bar_size, 'TRADES', True, 1, False)
    df = util.df(bars).set_index('date').tail(num_bars)
    ib.disconnect()
    return df


# --- Lorentzian KNN and filters ---
def lorentzian_distance(a, b):
    return np.sum(np.log(1 + np.abs(a - b)))


def volatility_filter(df, vol_window, vol_thresh):
    atr = ta.volatility.AverageTrueRange(df['high'], df['low'], df['close'], window=vol_window).average_true_range()
    return (atr / df['close']) > vol_thresh


def regime_filter(df, threshold):
    return df['close'].pct_change().rolling(20).mean() > threshold


def adx_filter(df, adx_period, adx_threshold):
    return ta.trend.ADXIndicator(df['high'], df['low'], df['close'], window=adx_period).adx() > adx_threshold


def classify_lorentzian_knn_with_filters(df, features, sma_period,
                                         vol_window, vol_thresh,
                                         regime_thresh, adx_period, adx_thresh,
                                         n_neighbors=8, lookahead=4, max_bars_back=200):
    pred = np.full(len(df), np.nan)
    closes = df['close'].values
    feature_mat = df[features].values
    sma = df['SMA'].values

    vol_filter_vals = volatility_filter(df, vol_window, vol_thresh)
    regime_filt = regime_filter(df, regime_thresh)
    adx_filt = adx_filter(df, adx_period, adx_thresh)
    length = len(df)

    for idx in range(max_bars_back, length):
        if np.isnan(sma[idx]) or closes[idx] == 0:
            continue
        if not (vol_filter_vals.iloc[idx] and regime_filt.iloc[idx] and adx_filt.iloc[idx]):
            continue
        anchor_start = max(0, idx - max_bars_back)
        anchor_indices = np.arange(anchor_start, idx)
        anchor_feats = feature_mat[anchor_indices, :]
        target = feature_mat[idx]
        dists = np.array([lorentzian_distance(target, anchor_feats[j]) for j in range(anchor_feats.shape[0])])
        if len(dists) < n_neighbors:
            continue
        knn_indices = dists.argsort()[:n_neighbors]
        train_valid = anchor_indices[knn_indices] + lookahead < length
        selected = anchor_indices[knn_indices][train_valid]
        if len(selected) == 0:
            pred[idx] = 0
            continue
        y_train = (closes[selected + lookahead] > closes[selected]).astype(int) - (closes[selected + lookahead] < closes[selected]).astype(int)
        vote = y_train.sum()
        raw_signal = np.sign(vote) if vote != 0 else 0
        if raw_signal == 1 and closes[idx] > sma[idx]:
            pred[idx] = 1
        elif raw_signal == -1 and closes[idx] < sma[idx]:
            pred[idx] = -1
        else:
            pred[idx] = 0
    return pred


# --- Black-Scholes implied vol and Greeks ---
def d1(S, K, r, sigma, T):
    return (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))


def d2(S, K, r, sigma, T):
    return d1(S, K, r, sigma, T) - sigma * np.sqrt(T)


def bs_price(S, K, r, sigma, T, option_type):
    D1 = d1(S, K, r, sigma, T)
    D2 = d2(S, K, r, sigma, T)
    if option_type == 'C':
        return S * norm.cdf(D1) - K * np.exp(-r * T) * norm.cdf(D2)
    else:
        return K * np.exp(-r * T) * norm.cdf(-D2) - S * norm.cdf(-D1)


def bs_delta(S, K, r, sigma, T, option_type):
    D1 = d1(S, K, r, sigma, T)
    return norm.cdf(D1) if option_type == 'C' else norm.cdf(D1) - 1


def bs_gamma(S, K, r, sigma, T):
    D1 = d1(S, K, r, sigma, T)
    return norm.pdf(D1) / (S * sigma * np.sqrt(T))


def implied_volatility(market_price, S, K, r, T, option_type):
    def objective(sigma):
        return bs_price(S, K, r, sigma, T, option_type) - market_price
    try:
        return brentq(objective, 1e-4, 5.0, maxiter=500)
    except Exception:
        return np.nan


def calculate_greeks(row):
    S = row['Stock Price']
    K = strike
    price = row['Option Close']
    T = row['T']
    option_type = right
    iv = implied_volatility(price, S, K, r, T, option_type)
    if np.isnan(iv):
        return pd.Series([np.nan, np.nan, np.nan])
    delta = bs_delta(S, K, r, iv, T, option_type)
    gamma = bs_gamma(S, K, r, iv, T)
    return pd.Series([iv, delta, gamma])


# --- Main async pipeline ---
async def main():
    # Fetch option + underlying stock close price data for synchronized duration and barsize
    df_option = await get_option_stock_data(symbol, expiry, strike, right, durationStr, barSizeSetting)
    df_stock = await fetch_ibkr_stock(SYMBOL, EXCHANGE, CURRENCY, BAR_SIZE, DURATION, NUM_BARS)

    # Remove timezone for pandas join compatibility
    df_option.index = df_option.index.tz_localize(None)
    df_stock.index = df_stock.index.tz_localize(None)

    # Reindex option data to stock timestamps to synchronize, forward fill missing option prices
    df_option = df_option.reindex(df_stock.index)
    df_option['Option Close'] = df_option['Option Close'].ffill()
    df_option['Stock Price'] = df_option['Stock Price'].ffill()
    df_option.dropna(subset=['Option Close', 'Stock Price'], inplace=True)

    # Calculate time to expiry T
    expiry_date = pd.to_datetime(expiry, format='%Y%m%d')
    df_option['T'] = (expiry_date - df_option.index).total_seconds() / (365.25 * 24 * 3600)
    df_option = df_option[df_option['T'] > 0].copy()

    # Calculate IV and Greeks on option data
    df_option[['IV', 'Delta', 'Gamma']] = df_option.apply(calculate_greeks, axis=1)
    df_option['Option_Return'] = df_option['Option Close'].pct_change()

    # Stock technical features
    df_stock['RSI'] = ta.momentum.RSIIndicator(df_stock['close'], window=14).rsi()
    df_stock['ADX'] = ta.trend.ADXIndicator(df_stock['high'], df_stock['low'], df_stock['close'], window=14).adx()
    df_stock['CCI'] = ta.trend.CCIIndicator(df_stock['high'], df_stock['low'], df_stock['close'], window=20).cci()
    df_stock['SMA'] = ta.trend.SMAIndicator(df_stock['close'], window=SMA_PERIOD).sma_indicator()
    macd_ind = ta.trend.MACD(df_stock['close'], window_slow=26, window_fast=12, window_sign=9)
    df_stock['MACD'] = macd_ind.macd()
    df_stock['MACD_signal'] = macd_ind.macd_signal()
    df_stock['MACD_hist'] = macd_ind.macd_diff()

    # Keep OHLC columns for filters
    stock_required_cols = STOCK_FEATURES + ['high', 'low', 'close']
    df_stock.dropna(subset=stock_required_cols, inplace=True)

    # Combine option features with stock OHLC and indicators on timestamps
    df_features = df_option.join(df_stock[stock_required_cols], how='inner')
    df_features.dropna(subset=COMBINED_FEATURES, inplace=True)

    # Prepare 'close' & 'SMA' columns for KNN
    df_features['close'] = df_features['Option Close']
    df_features['SMA'] = df_features['Option Close'].rolling(window=SMA_PERIOD).mean()
    df_features.dropna(subset=['close', 'SMA'], inplace=True)

    # Run KNN
    df_features['raw_pred'] = classify_lorentzian_knn_with_filters(
        df_features, COMBINED_FEATURES, SMA_PERIOD,
        VOL_WINDOW, VOL_THRESHOLD,
        REGIME_THRESHOLD, ADX_PERIOD, ADX_THRESHOLD,
        n_neighbors=K_NEIGHBORS, lookahead=LOOKAHEAD, max_bars_back=200
    )

    # Plot option price and signals
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=df_features.index, y=df_features['Option Close'],
        mode='lines', name='Option Close Price'
    ))
    fig.add_trace(go.Scatter(
        x=df_features.index[df_features['raw_pred'] == 1],
        y=df_features['Option Close'][df_features['raw_pred'] == 1],
        mode='markers',
        marker=dict(color='green', size=8, symbol='circle'),
        name='Predicted Long'
    ))
    fig.add_trace(go.Scatter(
        x=df_features.index[df_features['raw_pred'] == -1],
        y=df_features['Option Close'][df_features['raw_pred'] == -1],
        mode='markers',
        marker=dict(color='red', size=8, symbol='circle'),
        name='Predicted Short'
    ))

    fig.update_layout(
        title=f"Option Price for {symbol} with Combined Option+Stock Feature KNN Prediction",
        xaxis_title="Date/Time",
        yaxis_title="Option Close Price",
        template='plotly_white',
        hovermode='x unified',
        width=1100,
        height=650
    )
    fig.show()

    print("\nMost recent signals:")
    print(df_features[['Option Close', 'raw_pred']].tail(5))

    return df_features


if __name__ == '__main__':
    # Run exactly once, save result in df_features
    df_features = asyncio.run(main())

    # You can now use df_features variable here or export it
    # For example:
    print(df_features.head())




Most recent signals:
                     Option Close  raw_pred
date                                       
2025-07-22 15:55:00         23.66       NaN
2025-07-22 15:56:00         23.66       NaN
2025-07-22 15:57:00         23.66       NaN
2025-07-22 15:58:00         23.66       NaN
2025-07-22 15:59:00         23.66       NaN
                     Stock Price  Option Close         T        IV     Delta  \
date                                                                           
2025-06-23 10:24:00       167.41          10.9  0.488889  0.311011  0.449948   
2025-06-23 10:25:00       167.49          10.9  0.488887  0.310235  0.450479   
2025-06-23 10:26:00       167.53          10.9  0.488885  0.309847  0.450746   
2025-06-23 10:27:00       167.40          10.9  0.488883  0.311111  0.449881   
2025-06-23 10:28:00       167.41          10.9  0.488881  0.311014  0.449947   

                        Gamma  Option_Return        RSI        ADX  \
date                                   

In [11]:
# Calculate 4-bar forward returns
df_features['forward_return'] = df_features['close'].shift(-LOOKAHEAD) / df_features['close'] - 1

# Filter to bars where a prediction was made and the future is available
mask = df_features['raw_pred'].notna() & (df_features['raw_pred'] != 0) & df_features['forward_return'].notna()

# Evaluate correct direction
correct = np.where(
    (df_features['raw_pred'] == 1) & (df_features['forward_return'] > 0), 1,
    np.where((df_features['raw_pred'] == -1) & (df_features['forward_return'] < 0), 1, 0)
)
correct = correct[mask.values]

# Long/short statistics
long_mask = mask & (df_features['raw_pred'] == 1)
short_mask = mask & (df_features['raw_pred'] == -1)

long_returns = df_features.loc[long_mask, 'forward_return']
short_returns = df_features.loc[short_mask, 'forward_return']

print('\n---- STRATEGY PERFORMANCE METRICS OVER 4-BAR HORIZON ----')
print(f'Number of Predictions Scored: {mask.sum()}')
print(f'Percent Correct: {100 * correct.mean():.2f}%')
print(f'Long Signals:   {long_mask.sum()}  | Avg Return: {100 * long_returns.mean():.3f}%')
print(f'Short Signals:  {short_mask.sum()}  | Avg Return: {100 * short_returns.mean():.3f}%')
print('--------------------------------------------------------\n')

import pandas as pd
summary = pd.DataFrame({
    'Num Signals': [long_mask.sum(), short_mask.sum()],
    'Avg 4-Bar Ret %': [long_returns.mean() * 100, short_returns.mean() * 100]
}, index=['Long', 'Short'])
display(summary)



---- STRATEGY PERFORMANCE METRICS OVER 4-BAR HORIZON ----
Number of Predictions Scored: 1427
Percent Correct: 22.70%
Long Signals:   811  | Avg Return: 0.330%
Short Signals:  616  | Avg Return: -0.125%
--------------------------------------------------------



Unnamed: 0,Num Signals,Avg 4-Bar Ret %
Long,811,0.329884
Short,616,-0.125215
