In [1]:
import nest_asyncio
nest_asyncio.apply()

import asyncio
from ib_insync import *
import pandas as pd
import numpy as np
import ta
import plotly.graph_objects as go
from scipy.stats import norm
from scipy.optimize import brentq

# --- Parameters for options ---
symbol = 'JOE'
expiry = '20251219'
strike = 60
right = 'C'  # Call or Put
durationStr = '7 D'
barSizeSetting = '5 mins'
r = 0.044

# --- Parameters for KNN stock prediction ---
SYMBOL = 'SPY'
EXCHANGE = 'SMART'
CURRENCY = 'USD'
BAR_SIZE = '1 min'
DURATION = '28 D'
NUM_BARS = 8000
VOL_WINDOW = 30
VOL_THRESHOLD = 0.0001
REGIME_THRESHOLD = -0.01
ADX_PERIOD = 14
ADX_THRESHOLD = 20
SMA_PERIOD = 200
LOOKAHEAD = 4
K_NEIGHBORS = 8
FEATURES = ['RSI', 'ADX', 'CCI', 'MACD']

# --- Black-Scholes helper functions ---

def d1(S, K, r, sigma, T):
    return (np.log(S / K) + (r + 0.5 * sigma ** 2) * T) / (sigma * np.sqrt(T))

def d2(S, K, r, sigma, T):
    return d1(S, K, r, sigma, T) - sigma * np.sqrt(T)

def bs_price(S, K, r, sigma, T, option_type):
    D1 = d1(S, K, r, sigma, T)
    D2 = d2(S, K, r, sigma, T)
    if option_type == 'C':
        return S * norm.cdf(D1) - K * np.exp(-r * T) * norm.cdf(D2)
    else:
        return K * np.exp(-r * T) * norm.cdf(-D2) - S * norm.cdf(-D1)

def bs_delta(S, K, r, sigma, T, option_type):
    D1 = d1(S, K, r, sigma, T)
    return norm.cdf(D1) if option_type == 'C' else norm.cdf(D1) - 1

def bs_gamma(S, K, r, sigma, T):
    D1 = d1(S, K, r, sigma, T)
    return norm.pdf(D1) / (S * sigma * np.sqrt(T))

def bs_vega(S, K, r, sigma, T):
    D1 = d1(S, K, r, sigma, T)
    return S * norm.pdf(D1) * np.sqrt(T) / 100

def bs_theta(S, K, r, sigma, T, option_type):
    D1 = d1(S, K, r, sigma, T)
    D2 = d2(S, K, r, sigma, T)
    pdf_D1 = norm.pdf(D1)
    if option_type == 'C':
        theta = -S * pdf_D1 * sigma / (2 * np.sqrt(T)) - r * K * np.exp(-r * T) * norm.cdf(D2)
    else:
        theta = -S * pdf_D1 * sigma / (2 * np.sqrt(T)) + r * K * np.exp(-r * T) * norm.cdf(-D2)
    return theta / 365

def implied_volatility(market_price, S, K, r, T, option_type):
    def objective(sigma):
        return bs_price(S, K, r, sigma, T, option_type) - market_price
    try:
        return brentq(objective, 1e-4, 5.0, maxiter=500)
    except Exception:
        return np.nan

# --- Async Data Fetching ---
async def get_option_stock_data(symbol, expiry, strike, right, duration, barsize):
    ib = IB()
    await ib.connectAsync('127.0.0.1', 7497, clientId=np.random.randint(1000, 9999))

    stock = Stock(symbol, 'SMART', 'USD')
    opt = Option(symbol, expiry, strike, right, 'SMART')

    await ib.qualifyContractsAsync(stock)
    await ib.qualifyContractsAsync(opt)

    stock_bars = await ib.reqHistoricalDataAsync(stock, '', duration, barsize, 'TRADES', True, 1)
    df_stock = util.df(stock_bars).set_index('date').rename(columns={'close': 'Stock Price'})

    opt_bars = await ib.reqHistoricalDataAsync(opt, '', duration, barsize, 'TRADES', True, 1)
    df_opt = util.df(opt_bars).set_index('date').rename(columns={'close': 'Option Close'})

    ib.disconnect()

    df = df_stock[['Stock Price']].join(df_opt[['Option Close']], how='inner').dropna()
    return df

async def fetch_ibkr_stock(symbol, exchange, currency, bar_size, duration, num_bars):
    ib = IB()
    await ib.connectAsync('127.0.0.1', 7497, clientId=np.random.randint(1000, 9999))
    contract = Stock(symbol, exchange, currency)
    await ib.qualifyContractsAsync(contract)
    bars = await ib.reqHistoricalDataAsync(
        contract, '', duration, bar_size, 'TRADES', True, 1, False)
    df = util.df(bars).set_index('date').tail(num_bars)
    ib.disconnect()
    return df

# --- Filters & KNN functions ---
def lorentzian_distance(a, b):
    return np.sum(np.log(1 + np.abs(a - b)))

def volatility_filter(df, vol_window, vol_thresh):
    atr = ta.volatility.AverageTrueRange(df['high'], df['low'], df['close'], window=vol_window).average_true_range()
    return (atr / df['close']) > vol_thresh

def regime_filter(df, threshold):
    return df['close'].pct_change().rolling(20).mean() > threshold

def adx_filter(df, adx_period, adx_threshold):
    return ta.trend.ADXIndicator(df['high'], df['low'], df['close'], window=adx_period).adx() > adx_threshold

def classify_lorentzian_knn_with_filters(df, features, sma_period,
                                         vol_window, vol_thresh,
                                         regime_thresh, adx_period, adx_thresh,
                                         n_neighbors=8, lookahead=4, max_bars_back=200):
    pred = np.full(len(df), np.nan)
    closes = df['close'].values
    feature_mat = df[features].values
    sma = df['SMA'].values

    vol_filter = volatility_filter(df, vol_window, vol_thresh)
    regime_filt = regime_filter(df, regime_thresh)
    adx_filt = adx_filter(df, adx_period, adx_thresh)
    length = len(df)
    for idx in range(max_bars_back, length):
        if np.isnan(sma[idx]) or closes[idx] == 0:
            continue
        if not (vol_filter.iloc[idx] and regime_filt.iloc[idx] and adx_filt.iloc[idx]):
            continue
        anchor_start = max(0, idx - max_bars_back)
        anchor_indices = np.arange(anchor_start, idx)
        anchor_feats = feature_mat[anchor_indices, :]
        anchor_closes = closes[anchor_indices]
        target = feature_mat[idx]
        dists = np.array([lorentzian_distance(target, anchor_feats[j]) for j in range(anchor_feats.shape[0])])
        if len(dists) < n_neighbors:
            continue
        knn_indices = dists.argsort()[:n_neighbors]
        train_valid = anchor_indices[knn_indices] + lookahead < length
        selected = anchor_indices[knn_indices][train_valid]
        if len(selected) == 0:
            pred[idx] = 0
            continue
        y_train = (closes[selected + lookahead] > closes[selected]).astype(int) - (closes[selected + lookahead] < closes[selected]).astype(int)
        vote = y_train.sum()
        raw_signal = np.sign(vote) if vote != 0 else 0
        if raw_signal == 1 and closes[idx] > sma[idx]:
            pred[idx] = 1
        elif raw_signal == -1 and closes[idx] < sma[idx]:
            pred[idx] = -1
        else:
            pred[idx] = 0
    return pred

# --- Main async function fetching and processing both datasets ---
async def main():
    # Fetch option + underlying stock data for given symbol
    df_option = await get_option_stock_data(symbol, expiry, strike, right, durationStr, barSizeSetting)
    expiry_date = pd.to_datetime(expiry, format='%Y%m%d')
    df_option.index = df_option.index.tz_localize(None)
    df_option['T'] = (expiry_date - df_option.index).total_seconds() / (365.25 * 24 * 3600)
    df_option = df_option[df_option['T'] > 0].copy()

    # Calculate implied volatility and Greeks on option data
    def calculate_greeks(row):
        S = row['Stock Price']
        K = strike
        price = row['Option Close']
        T = row['T']
        option_type = right
        iv = implied_volatility(price, S, K, r, T, option_type)
        if np.isnan(iv):
            return pd.Series([np.nan] * 5)
        delta = bs_delta(S, K, r, iv, T, option_type)
        gamma = bs_gamma(S, K, r, iv, T)
        vega = bs_vega(S, K, r, iv, T)
        theta = bs_theta(S, K, r, iv, T, option_type)
        return pd.Series([iv, delta, gamma, vega, theta])
    df_option[['IV_calculated', 'Delta', 'Gamma', 'Vega', 'Theta']] = df_option.apply(calculate_greeks, axis=1)

    # Fetch stock data for KNN prediction
    df_stock = await fetch_ibkr_stock(SYMBOL, EXCHANGE, CURRENCY, BAR_SIZE, DURATION, NUM_BARS)

    # Feature engineering on stock data
    df_stock['RSI'] = ta.momentum.RSIIndicator(df_stock['close'], window=14).rsi()
    df_stock['ADX'] = ta.trend.ADXIndicator(df_stock['high'], df_stock['low'], df_stock['close'], window=14).adx()
    df_stock['CCI'] = ta.trend.CCIIndicator(df_stock['high'], df_stock['low'], df_stock['close'], window=20).cci()
    df_stock['SMA'] = ta.trend.SMAIndicator(df_stock['close'], window=SMA_PERIOD).sma_indicator()
    macd_ind = ta.trend.MACD(df_stock['close'], window_slow=26, window_fast=12, window_sign=9)
    df_stock['MACD'] = macd_ind.macd()
    df_stock['MACD_signal'] = macd_ind.macd_signal()
    df_stock['MACD_hist'] = macd_ind.macd_diff()
    df_stock = df_stock.dropna().reset_index()

    # Run KNN classification
    df_stock['raw_pred'] = classify_lorentzian_knn_with_filters(
        df_stock, FEATURES, SMA_PERIOD, VOL_WINDOW, VOL_THRESHOLD,
        REGIME_THRESHOLD, ADX_PERIOD, ADX_THRESHOLD,
        n_neighbors=K_NEIGHBORS, lookahead=LOOKAHEAD, max_bars_back=200
    )

    # Align or merge the stock KNN predictions with the option data by timestamps if possible,
    # or use the latest prediction as a proxy.
    latest_stock_pred = df_stock['raw_pred'].iloc[-1] if not df_stock.empty else np.nan
    df_option['KNN_Stock_Prediction'] = latest_stock_pred

    return df_option

# --- Dash app code (similar to your current app) ---
from dash import Dash, dcc, html, Input, Output, State

app = Dash(__name__)

columns = [
    'Stock Price', 'Option Close',
    'IV_calculated', 'Delta', 'Gamma', 'Vega', 'Theta',
    'KNN_Stock_Prediction'
]
y_names = {
    'Stock Price': 'Price ($)',
    'Option Close': 'Price ($)',
    'IV_calculated': 'Implied Volatility (%)',
    'Delta': 'Delta',
    'Gamma': 'Gamma',
    'Vega': 'Vega',
    'Theta': 'Theta',
    'KNN_Stock_Prediction': 'Stock Direction Prediction'
}

app.layout = html.Div([
    dcc.Interval(id='interval-component', interval=60000, n_intervals=0),  # e.g., update every minute
    *[dcc.Graph(id=f'graph-{col}') for col in columns]
])

@app.callback(
    [Output(f'graph-{col}', 'figure') for col in columns],
    Input('interval-component', 'n_intervals'),
    [State(f'graph-{col}', 'relayoutData') for col in columns]
)
def update_graphs(n_intervals, *relayouts):
    # Use global or cached df if needed, here assuming df from main() is global or stored appropriately.
    global df
    figures = []
    xrange = None
    for relayout in relayouts:
        if relayout and 'xaxis.range[0]' in relayout and 'xaxis.range[1]' in relayout:
            xrange = [relayout['xaxis.range[0]'], relayout['xaxis.range[1]']]
            break

    for col in columns:
        fig = go.Figure()
        if col == 'IV_calculated':
            yvals = 100 * df[col]
            y_axis_title = "Implied Volatility (%)"
        elif col == 'KNN_Stock_Prediction':
            yvals = df[col]  # discrete signals for stock direction
            y_axis_title = y_names[col]
        else:
            yvals = df[col]
            y_axis_title = y_names[col]

        fig.add_trace(go.Scatter(
            x=df.index, y=yvals,
            mode='lines+markers' if col == 'KNN_Stock_Prediction' else 'lines',
            name=col,
            line=dict(width=2)
        ))

        layout_kwargs = dict(
            title=f"{col} over Time",
            xaxis_title='Date/Time',
            yaxis_title=y_axis_title,
            template='plotly_white',
            width=900,
            height=500,
            hovermode='x unified'
        )
        if xrange:
            layout_kwargs['xaxis_range'] = xrange

        fig.update_layout(**layout_kwargs)
        figures.append(fig)
    return figures

# --- Run the async main and launch app ---
if __name__ == '__main__':
    df = asyncio.run(main())  # Get combined data with Greeks and KNN prediction
    app.run(debug=True)


Address already in use
Port 8050 is in use by another program. Either identify and stop that program, or start the server with a different port.


SystemExit: 1


To exit: use 'exit', 'quit', or Ctrl-D.



In [None]:
# --- EVALUATE PREDICTION ACCURACY AND RETURNS OVER 4-BAR HORIZON ---

# Calculate 4-bar forward returns
df['forward_return'] = df['close'].shift(-LOOKAHEAD) / df['close'] - 1

# Filter to bars where a prediction was made and the future is available
mask = df['raw_pred'].notna() & (df['raw_pred'] != 0) & df['forward_return'].notna()

# Evaluate correct direction
correct = np.where(
    (df['raw_pred'] == 1) & (df['forward_return'] > 0), 1,
    np.where((df['raw_pred'] == -1) & (df['forward_return'] < 0), 1, 0)
)
# Only keep for valid prediction bars
correct = correct[mask.values]

# Long/short statistics
long_mask = mask & (df['raw_pred'] == 1)
short_mask = mask & (df['raw_pred'] == -1)

long_returns = df.loc[long_mask, 'forward_return']
short_returns = df.loc[short_mask, 'forward_return']

# Display results
print('\n---- STRATEGY PERFORMANCE METRICS OVER 4-BAR HORIZON ----')
print(f'Number of Predictions Scored: {mask.sum()}')
print(f'Percent Correct: {100 * correct.mean():.2f}%')
print(f'Long Signals:   {long_mask.sum()}  | Avg Return: {100 * long_returns.mean():.3f}%')
print(f'Short Signals:  {short_mask.sum()}  | Avg Return: {100 * short_returns.mean():.3f}%')
print('--------------------------------------------------------\n')

# Optionally, display a Markdown table of results
import pandas as pd
summary = pd.DataFrame({
    'Num Signals': [long_mask.sum(), short_mask.sum()],
    'Avg 4-Bar Ret %': [long_returns.mean() * 100, short_returns.mean() * 100]
}, index=['Long', 'Short'])
display(summary)


NameError: name 'df' is not defined