In [9]:
import nest_asyncio
nest_asyncio.apply()
import asyncio
from ib_insync import *
import pandas as pd
import numpy as np
import ta
import plotly.graph_objects as go

"""
Filter parameters
-----------------
VOL_WINDOW:       ATR period (bars) for volatility calculation (consider higher for daily)
VOL_THRESHOLD:    Minimum ATR percent (ATR/close) for a bar to be considered volatile enough for trading (e.g., 0.01 = 1%)
REGIME_THRESHOLD: Rolling mean of returns must be above this (e.g., -0.1) to be considered "trending" regime
ADX_PERIOD:       ADX calculation period (bars)
ADX_THRESHOLD:    Minimum ADX value to confirm market is trending (20 = classic)
SMA_PERIOD:       SMA bars for trend filter (only long if above, short if below)
"""

# ========= CHART SETTINGS =========
# For intraday bars (e.g., 1-minute), use:
BAR_SIZE = '1 min'
DURATION = '14 D'
NUM_BARS = 4000

# For daily chart (uncomment the three lines below for daily bars):
# BAR_SIZE = '1 day'
# DURATION = '5 Y'
# NUM_BARS = 2000
# ================================

# Filter tuning (adjust to your needs)
VOL_WINDOW       = 30
VOL_THRESHOLD    = 0.0001   # e.g. 1% ATR for daily ~0.01, 0.0001 is permissive for minute bars
REGIME_THRESHOLD = -0.01
ADX_PERIOD       = 14
ADX_THRESHOLD    = 20
SMA_PERIOD       = 200

SYMBOL      = 'SPY'
EXCHANGE    = 'SMART'
CURRENCY    = 'USD'
LOOKAHEAD   = 4
K_NEIGHBORS = 8

# === Add or remove features here as desired ===
FEATURES = ['RSI', 'ADX', 'CCI', 'MACD']

async def fetch_ibkr_stock(symbol, exchange, currency, bar_size, duration, num_bars):
    """Fetches OHLCV bars from IBKR using specified bar size/duration."""
    ib = IB()
    await ib.connectAsync('127.0.0.1', 7497, clientId=np.random.randint(1000, 10000))
    contract = Stock(symbol, exchange, currency)
    await ib.qualifyContractsAsync(contract)
    bars = await ib.reqHistoricalDataAsync(
        contract=contract,
        endDateTime='',
        durationStr=duration,
        barSizeSetting=bar_size,
        whatToShow='TRADES',
        useRTH=True,
        formatDate=1,
        keepUpToDate=False
    )
    df = util.df(bars)
    df.set_index('date', inplace=True)
    df = df.tail(num_bars)
    ib.disconnect()
    return df

def lorentzian_distance(a, b):
    """Lorentzian distance metric for nearest neighbor calculations."""
    return np.sum(np.log(1 + np.abs(a - b)))

def volatility_filter(df, vol_window, vol_thresh):
    """ATR percent-based volatility filter. Returns Boolean mask."""
    atr = ta.volatility.AverageTrueRange(df['high'], df['low'], df['close'], window=vol_window).average_true_range()
    atr_pct = atr / df['close']
    return atr_pct > vol_thresh

def regime_filter(df, threshold):
    """Regime filter: rolling mean of returns must exceed threshold."""
    close_returns = df['close'].pct_change()
    regime = close_returns.rolling(window=20).mean()
    return regime > threshold

def adx_filter(df, adx_period, adx_threshold):
    """ADX trend filter: ADX must exceed threshold."""
    adx = ta.trend.ADXIndicator(df['high'], df['low'], df['close'], window=adx_period).adx()
    return adx > adx_threshold

def classify_lorentzian_knn_with_filters(
        df, features, sma_period,
        vol_window, vol_thresh,
        regime_thresh, adx_period, adx_thresh,
        n_neighbors=8, lookahead=4, max_bars_back=200):
    """
    L-KNN prediction with regime, volatility, ADX, and SMA filters.
    Now outputs a prediction for every bar, including the most recent bars.
    """
    pred = np.full(len(df), np.nan)
    closes = df['close'].values
    feature_mat = df[features].values
    sma = df['SMA'].values

    vol_filter = volatility_filter(df, vol_window, vol_thresh)
    regime_filt = regime_filter(df, regime_thresh)
    adx_filt = adx_filter(df, adx_period, adx_thresh)
    length = len(df)
    for idx in range(max_bars_back, length):  # Predict on every bar!
        if np.isnan(sma[idx]) or closes[idx] == 0:
            continue
        if not (vol_filter.iloc[idx] and regime_filt.iloc[idx] and adx_filt.iloc[idx]):
            continue
        anchor_start = max(0, idx - max_bars_back)
        anchor_indices = np.arange(anchor_start, idx)
        anchor_feats = feature_mat[anchor_indices, :]
        anchor_closes = closes[anchor_indices]
        target = feature_mat[idx]
        dists = np.array([lorentzian_distance(target, anchor_feats[j]) for j in range(anchor_feats.shape[0])])
        if len(dists) < n_neighbors:
            continue
        knn_indices = dists.argsort()[:n_neighbors]
        # For the last LOOKAHEAD bars, can't compute label, but can still predict.
        train_valid = anchor_indices[knn_indices] + lookahead < length
        selected = anchor_indices[knn_indices][train_valid]
        if len(selected) == 0:
            # For the latest, if not enough future bars, just output neutral (or previous).
            pred[idx] = 0
            continue
        y_train = (closes[selected + lookahead] > closes[selected]).astype(int) - \
                  (closes[selected + lookahead] < closes[selected]).astype(int)
        vote = y_train.sum()
        raw_signal = np.sign(vote) if vote != 0 else 0
        # SMA trend filter: only long if above SMA, short if below
        if raw_signal == 1 and closes[idx] > sma[idx]:
            pred[idx] = 1
        elif raw_signal == -1 and closes[idx] < sma[idx]:
            pred[idx] = -1
        else:
            pred[idx] = 0
    return pred

# ==== Main Pipeline ====
async def main():
    df = await fetch_ibkr_stock(SYMBOL, EXCHANGE, CURRENCY, BAR_SIZE, DURATION, NUM_BARS)

    # === Feature Engineering ===
    df['RSI'] = ta.momentum.RSIIndicator(df['close'], window=14).rsi()
    df['ADX'] = ta.trend.ADXIndicator(df['high'], df['low'], df['close'], window=14).adx()
    df['CCI'] = ta.trend.CCIIndicator(df['high'], df['low'], df['close'], window=20).cci()
    df['SMA'] = ta.trend.SMAIndicator(df['close'], window=SMA_PERIOD).sma_indicator()
    # ---- MACD Features ----
    macd_ind = ta.trend.MACD(df['close'], window_slow=26, window_fast=12, window_sign=9)
    df['MACD']        = macd_ind.macd()
    df['MACD_signal'] = macd_ind.macd_signal()
    df['MACD_hist']   = macd_ind.macd_diff()
    # -----------------------
    df = df.dropna().reset_index()

    # === ML + Filters ===
    df['raw_pred'] = classify_lorentzian_knn_with_filters(
        df, FEATURES, SMA_PERIOD, VOL_WINDOW, VOL_THRESHOLD,
        REGIME_THRESHOLD, ADX_PERIOD, ADX_THRESHOLD,
        n_neighbors=K_NEIGHBORS, lookahead=LOOKAHEAD, max_bars_back=200
    )

    # === Trade state logic ===
    df['trade_signal'] = np.nan
    prev_position = 0
    for i, row in df.iterrows():
        sig = row['raw_pred']
        if np.isnan(sig) or sig == 0:
            continue
        sig = int(sig)
        if sig != prev_position:
            df.at[i, 'trade_signal'] = sig
            prev_position = sig

    # === Plot Interactive Chart ===
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=df['date'], y=df['close'],
        mode='lines', name='Close Price', line=dict(color='black', width=2)
    ))
    fig.add_trace(go.Scatter(
        x=df['date'][df['raw_pred'] == 1], y=df['close'][df['raw_pred'] == 1],
        mode='markers', name='Kernel Long', marker=dict(color='green', size=8, symbol='circle')
    ))
    fig.add_trace(go.Scatter(
        x=df['date'][df['raw_pred'] == -1], y=df['close'][df['raw_pred'] == -1],
        mode='markers', name='Kernel Short', marker=dict(color='red', size=8, symbol='circle')
    ))
    buys = df[df['trade_signal'] == 1]
    sells = df[df['trade_signal'] == -1]
    fig.add_trace(go.Scatter(
        x=buys['date'], y=buys['close'],
        mode='markers', marker=dict(symbol='triangle-up', color='green', size=15, line=dict(color='black', width=2)),
        name='Buy Signal'
    ))
    fig.add_trace(go.Scatter(
        x=sells['date'], y=sells['close'],
        mode='markers', marker=dict(symbol='triangle-down', color='red', size=15, line=dict(color='black', width=2)),
        name='Sell Signal'
    ))
    fig.update_layout(
        title=f"{SYMBOL} | Lorentzian KNN with MACD, Vol, Regime, ADX, SMA Filters",
        xaxis_title="Date/Time",
        yaxis_title="Price",
        hovermode='x unified',
        template='plotly_white',
        width=1100, height=650,
        legend=dict(x=0.01, y=0.99)
    )
    fig.show()

    # === Print predictions for the most recent 4 bars (including most recent bar) ===
    print("\nSignal Estimates for the Most Recent 4 Bars:")
    print(df[['date', 'close', 'raw_pred', 'trade_signal']].tail(4).to_string(index=False))

    return df

# --- Run ---
df_signals = asyncio.run(main())
df = df_signals


Signal Estimates for the Most Recent 4 Bars:
                     date  close  raw_pred  trade_signal
2025-07-17 15:56:00-04:00 627.98       NaN           NaN
2025-07-17 15:57:00-04:00 628.04       NaN           NaN
2025-07-17 15:58:00-04:00 628.19       NaN           NaN
2025-07-17 15:59:00-04:00 627.98       NaN           NaN


In [10]:
# --- EVALUATE PREDICTION ACCURACY AND RETURNS OVER 4-BAR HORIZON ---

# Calculate 4-bar forward returns
df['forward_return'] = df['close'].shift(-LOOKAHEAD) / df['close'] - 1

# Filter to bars where a prediction was made and the future is available
mask = df['raw_pred'].notna() & (df['raw_pred'] != 0) & df['forward_return'].notna()

# Evaluate correct direction
correct = np.where(
    (df['raw_pred'] == 1) & (df['forward_return'] > 0), 1,
    np.where((df['raw_pred'] == -1) & (df['forward_return'] < 0), 1, 0)
)
# Only keep for valid prediction bars
correct = correct[mask.values]

# Long/short statistics
long_mask = mask & (df['raw_pred'] == 1)
short_mask = mask & (df['raw_pred'] == -1)

long_returns = df.loc[long_mask, 'forward_return']
short_returns = df.loc[short_mask, 'forward_return']

# Display results
print('\n---- STRATEGY PERFORMANCE METRICS OVER 4-BAR HORIZON ----')
print(f'Number of Predictions Scored: {mask.sum()}')
print(f'Percent Correct: {100 * correct.mean():.2f}%')
print(f'Long Signals:   {long_mask.sum()}  | Avg Return: {100 * long_returns.mean():.3f}%')
print(f'Short Signals:  {short_mask.sum()}  | Avg Return: {100 * short_returns.mean():.3f}%')
print('--------------------------------------------------------\n')

# Optionally, display a Markdown table of results
import pandas as pd
summary = pd.DataFrame({
    'Num Signals': [long_mask.sum(), short_mask.sum()],
    'Avg 4-Bar Ret %': [long_returns.mean() * 100, short_returns.mean() * 100]
}, index=['Long', 'Short'])
display(summary)



---- STRATEGY PERFORMANCE METRICS OVER 4-BAR HORIZON ----
Number of Predictions Scored: 1065
Percent Correct: 55.49%
Long Signals:   627  | Avg Return: 0.005%
Short Signals:  438  | Avg Return: -0.012%
--------------------------------------------------------



Unnamed: 0,Num Signals,Avg 4-Bar Ret %
Long,627,0.004509
Short,438,-0.011529
