Do a random forest return predictor for long time equity screens, maybe including rates/ sectors / commodities / geoploitic indicators. 
Find the % of regimes in the last 30 days that have a higher volatility than the current IV 
Add trend in shannon entropy or/and regime percentages to evaluate recessions / hostile environments beginning. could also look at earnings timestamps affecting options prices 

Want to find stocks with a low hv/iv high regime % in highest stdev state, and high # of days with stdev above avg iv on options 

In [4]:
import yfinance as yf 
from yfinance import EquityQuery
import numpy as np
import pandas as pd
import time


In [5]:
import numpy as np
import pandas as pd
import yfinance as yf
from concurrent.futures import ThreadPoolExecutor, as_completed
import time
from hmmlearn.hmm import GaussianHMM
from scipy.stats import entropy
from statsmodels.tsa.stattools import adfuller
from yfinance import EquityQuery, screen
from tqdm import tqdm

# ---------------- CONFIG ----------------
num_stocks = 250
max_workers = 6  # To reduce rate limit risk
hv_window = 20
price_volume_threshold = 100_000_000
batch_delay = 1.0  # seconds between batches
option_retry_delay = 5  # seconds retry delay for options fetch
N = 252  # annualization factor for volatility

# ---------------- HELPERS ----------------

def find_closest_value(data_list, target_value):
    if len(data_list) == 0:
        return None
    return min(data_list, key=lambda x: abs(x - target_value))

def filter_special_expiry(expiry_list):
    return sorted(expiry_list)[:2]

def shannon_entropy(series, bins=10):
    hist, _ = np.histogram(series, bins=bins, density=True)
    hist = hist[hist > 0]
    return entropy(hist, base=2)

def hmm_regime_switch_prob_and_count(returns, n_states=2):
    returns = returns.values.reshape(-1, 1)
    model = GaussianHMM(n_components=n_states, covariance_type="diag", n_iter=100)
    model.fit(returns)
    hidden_states = model.predict(returns)
    last_state = hidden_states[-1]
    transmat = model.transmat_
    switch_prob = 1 - transmat[last_state, last_state]
    regime_switch_count = np.sum(hidden_states[1:] != hidden_states[:-1])
    return switch_prob, regime_switch_count

def fit_hmm_and_stats(returns, n_states=3):
    model = GaussianHMM(
        n_components=n_states,
        covariance_type="diag",
        n_iter=500,
        tol=1e-2,
        random_state=42,
        verbose=False
    )
    X = returns.values.reshape(-1, 1)
    model.fit(X)
    hidden_states = model.predict(X)
    means = model.means_.flatten()
    stds = np.sqrt(np.array([np.diag(cov)[0] for cov in model.covars_]))
    return hidden_states, means, stds

def pct_regimes_higher_than_iv(hidden_states, stds, avg_iv):
    last30 = hidden_states[-30:]
    stds_annualized = stds * np.sqrt(N)
    last30_vols_annualized = stds_annualized[last30]
    pct_higher = np.mean(last30_vols_annualized > avg_iv) * 100
    return pct_higher

def hmm_analysis(returns, avg_iv):
    if returns is None or avg_iv is None:
        return {
            "hmm_mean_0": None, "hmm_std_0": None,
            "hmm_mean_1": None, "hmm_std_1": None,
            "hmm_mean_2": None, "hmm_std_2": None,
            "pct_last30_in_highest_stdev_state": None,
            "highest_stdev_state": None,
            "pct_regimes_last30_higher_than_avg_iv": None
        }
    hidden_states, means, stds = fit_hmm_and_stats(returns, n_states=3)
    high_stdev_state = int(np.argmax(stds))
    last30 = hidden_states[-30:]
    pct_high = np.mean(last30 == high_stdev_state) * 100
    pct_regimes_higher = pct_regimes_higher_than_iv(hidden_states, stds, avg_iv)
    return {
        "hmm_mean_0": means[0], "hmm_std_0": stds[0],
        "hmm_mean_1": means[1], "hmm_std_1": stds[1],
        "hmm_mean_2": means[2], "hmm_std_2": stds[2],
        "pct_last30_in_highest_stdev_state": pct_high,
        "highest_stdev_state": high_stdev_state,
        "pct_regimes_last30_higher_than_avg_iv": pct_regimes_higher
    }

def hmm_regime_minute_analysis(ticker):
    # Download last 7 days 1-min data
    try:
        df = yf.download(ticker, period="7d", interval="1m", progress=False)
        closes = df['Close'].dropna()
        if len(closes) < 1000:
            return np.nan, np.nan, np.nan, np.nan, np.nan
        log_returns = np.log(closes).diff().dropna()
        simple_returns = closes.pct_change().dropna()

        ent = shannon_entropy(log_returns)
        switch_prob, switch_count = hmm_regime_switch_prob_and_count(log_returns)
        adf_pvalue = adfuller(closes)[1]
        avg_return = simple_returns.mean()
        return ent, switch_prob*100, switch_count, adf_pvalue, avg_return
    except Exception:
        return np.nan, np.nan, np.nan, np.nan, np.nan

def get_options(symbol, current_price, max_retries=2, retry_delay=option_retry_delay):
    for attempt in range(max_retries):
        try:
            stock = yf.Ticker(symbol)
            options_chain = stock.options
            if not options_chain:
                return pd.DataFrame(columns=['strike', 'impliedVolatility', 'openInterest', 'symbol'])
            expiries = filter_special_expiry(options_chain)
            options_data = []
            if current_price is None:
                return pd.DataFrame(columns=['strike', 'impliedVolatility', 'openInterest', 'symbol'])
            for expiry in expiries:
                chain = stock.option_chain(expiry)
                for opt_type, df in [('call', chain.calls), ('put', chain.puts)]:
                    if not df.empty:
                        closest_strike = find_closest_value(df['strike'], current_price)
                        filtered = df[df['strike'] == closest_strike].copy()
                        filtered['type'] = opt_type
                        filtered['expiry'] = expiry
                        filtered['symbol'] = symbol
                        options_data.append(filtered)
            if options_data:
                result = pd.concat(options_data, ignore_index=True)
                result = result[result['openInterest'] > 500]
                return result[['strike', 'impliedVolatility', 'openInterest', 'symbol']]
            return pd.DataFrame(columns=['strike', 'impliedVolatility', 'openInterest', 'symbol'])
        except Exception:
            if attempt < max_retries - 1:
                time.sleep(retry_delay)
                continue
            else:
                return pd.DataFrame(columns=['strike', 'impliedVolatility', 'openInterest', 'symbol'])
    return pd.DataFrame(columns=['strike', 'impliedVolatility', 'openInterest', 'symbol'])

def fetch_symbol_data_all(symbol):
    try:
        ticker = yf.Ticker(symbol)
        hist = ticker.history(period="130d")  # 130d covers all needed windows + 7d intraday fetches separately handled
        closes = hist['Close'].dropna()
        volumes = hist['Volume'].dropna()
        if len(closes) < hv_window + 1 or len(closes) < 121 or len(volumes) < 2:
            return None
        # Previous day
        prev_day = hist.iloc[-2]
        price = float(prev_day['Close'])
        volume = int(prev_day['Volume'])
        if price * volume < price_volume_threshold:
            return None

        # Historical volatility (20 days)
        log_returns = np.log(closes / closes.shift(1)).dropna()
        hv = log_returns[-hv_window:].std() * np.sqrt(252)

        # Last 120 returns for HMM
        returns_120 = np.log(closes / closes.shift(1)).dropna()[-120:]
        if len(returns_120) < 120:
            returns_120 = None

        # Options data
        options_df = get_options(symbol, price)
        if options_df.empty:
            return None
        avg_iv = options_df['impliedVolatility'].mean()
        ratio = avg_iv / hv if hv > 0 else None
        mean_oi = options_df['openInterest'].mean()

        # Apply HMM for returns
        hmm_stats = hmm_analysis(returns_120, avg_iv)

        # 7-day 1m intraday regime analysis
        ent, switch_prob, switch_count, adf_pvalue, avg_return = hmm_regime_minute_analysis(symbol)

        return {
            'symbol': symbol.upper(),
            'price': price,
            'volume': volume,
            'historical_volatility': hv,
            'avg_iv': avg_iv,
            'iv/hv_ratio': ratio,
            'mean_openInterest': mean_oi,
            # HMM daily regimes stats
            **hmm_stats,
            # Intraday minute data stats
            'ShannonEntropy': ent,
            'Regimeswitchprob': switch_prob,
            'Regimeswitchcount': switch_count,
            'ADF_Pvalue': adf_pvalue,
            'AvgReturn_7d': avg_return,
            # Store returns for reference (optional, can be removed to reduce memory)
            #'returns_120': returns_120,
        }
    except Exception as e:
        print(f"Error fetching data for {symbol}: {e}")
        return None

# ---------------- YFINANCE SCREENER QUERY ----------------
screen_query = EquityQuery("and", [
    EquityQuery("is-in", ["region", "us"]),
    EquityQuery("is-in", ["sector",
        "Technology", "Financial Services", "Consumer Cyclical",
        "Communication Services", "Basic Materials", "Industrials"
    ]),
    EquityQuery("GTE", ["eodprice", 0.50]),
    EquityQuery("GTE", ["avgdailyvol3m", 100000]),
    EquityQuery("LTE", ["short_percentage_of_float.value", 100])
])

# ---------------- BATCH PROCESS ----------------
def process_batch(offset, filterdf, options):
    screener = screen(
        screen_query,
        sortField="avgdailyvol3m",
        sortAsc=False,
        size=num_stocks,
        offset=offset
    )
    stocks = screener.get('quotes', [])
    if not stocks:
        return filterdf, options, offset, False

    symbols = [s['symbol'] for s in stocks]
    results = []
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {executor.submit(fetch_symbol_data_all, sym): sym for sym in symbols}
        for future in as_completed(futures):
            data = future.result()
            if data:
                results.append(data)

    if not results:
        return filterdf, options, offset + num_stocks, True

    # Build DataFrame for symbol data (excluding options which is kept separate)
    batch_df = pd.DataFrame([{k:v for k,v in r.items() if k != "options_df"} for r in results])

    # Concatenate all options data into options DataFrame
    options_dfs = [r.get('options_df') for r in results if r.get('options_df') is not None]
    if options_dfs:
        all_options = pd.concat(options_dfs, ignore_index=True)
        options = pd.concat([options, all_options], ignore_index=True)

    filterdf = pd.concat([filterdf, batch_df], ignore_index=True)

    # Pause between batches to reduce rate limiting
    time.sleep(batch_delay)

    return filterdf, options, offset + num_stocks, True

# ---------------- MAIN ----------------
offset = 0
filterdf = pd.DataFrame()
options = pd.DataFrame()

while True:
    try:
        filterdf, options, offset, has_more = process_batch(offset, filterdf, options)
        print(f"Processed batch, total rows so far: {len(filterdf)}")
        if not has_more:
            break
    except Exception as e:
        if e.__class__.__name__ == "YFRateLimitError":
            print("Rate limit reached. Ending batch processing loop early.")
            break
        else:
            raise  # re-raise unknown exceptions



print("\nFinal results:")
print(filterdf)


  df = yf.download(ticker, period="7d", interval="1m", progress=False)
  df = yf.download(ticker, period="7d", interval="1m", progress=False)
Model is not converging.  Current: 14118.92868313863 is not greater than 14121.723691475716. Delta is -2.7950083370851644
Model is not converging.  Current: 14119.231218238974 is not greater than 14121.73907270127. Delta is -2.5078544622956542
Model is not converging.  Current: 14119.083992568101 is not greater than 14121.694030093648. Delta is -2.6100375255464314
Model is not converging.  Current: 14120.533357183856 is not greater than 14121.21657769436. Delta is -0.6832205105038156
Model is not converging.  Current: 14118.680847180081 is not greater than 14121.673123718501. Delta is -2.992276538419901
Model is not converging.  Current: 14119.730559821657 is not greater than 14121.674938138518. Delta is -1.9443783168608206
  df = yf.download(ticker, period="7d", interval="1m", progress=False)
  df = yf.download(ticker, period="7d", interval="1m"

Processed batch, total rows so far: 133


  df = yf.download(ticker, period="7d", interval="1m", progress=False)
Model is not converging.  Current: 248.63020257530323 is not greater than 248.63291733598487. Delta is -0.002714760681641337
Model is not converging.  Current: 323.36532438568594 is not greater than 323.51399071866723. Delta is -0.14866633298129273
  df = yf.download(ticker, period="7d", interval="1m", progress=False)
Model is not converging.  Current: 13248.958365662758 is not greater than 13250.404745703914. Delta is -1.4463800411558623
  df = yf.download(ticker, period="7d", interval="1m", progress=False)
  df = yf.download(ticker, period="7d", interval="1m", progress=False)
Model is not converging.  Current: 14124.84824042614 is not greater than 14126.103888294514. Delta is -1.2556478683745809
Model is not converging.  Current: 338.23075167390994 is not greater than 338.25733665211567. Delta is -0.026584978205733023
  df = yf.download(ticker, period="7d", interval="1m", progress=False)
Model is not converging.  

Processed batch, total rows so far: 211


Model is not converging.  Current: 303.42101419203556 is not greater than 303.65803839033595. Delta is -0.23702419830038934
  df = yf.download(ticker, period="7d", interval="1m", progress=False)
  df = yf.download(ticker, period="7d", interval="1m", progress=False)
  df = yf.download(ticker, period="7d", interval="1m", progress=False)
  df = yf.download(ticker, period="7d", interval="1m", progress=False)
  df = yf.download(ticker, period="7d", interval="1m", progress=False)
Model is not converging.  Current: 26709.78486938255 is not greater than 26713.14027037087. Delta is -3.355400988319161
Model is not converging.  Current: 26710.24799015914 is not greater than 26712.098137819856. Delta is -1.8501476607161749
Model is not converging.  Current: 26707.352899888472 is not greater than 26714.951335323254. Delta is -7.5984354347820044
Model is not converging.  Current: 26709.657257972365 is not greater than 26713.31011558699. Delta is -3.652857614626555
  df = yf.download(ticker, period="

Processed batch, total rows so far: 255


  df = yf.download(ticker, period="7d", interval="1m", progress=False)
Model is not converging.  Current: 12634.228007426229 is not greater than 12644.066200901168. Delta is -9.838193474939544
  df = yf.download(ticker, period="7d", interval="1m", progress=False)
Model is not converging.  Current: 12309.879129990262 is not greater than 12312.235316735756. Delta is -2.3561867454936873
  df = yf.download(ticker, period="7d", interval="1m", progress=False)
  df = yf.download(ticker, period="7d", interval="1m", progress=False)
  df = yf.download(ticker, period="7d", interval="1m", progress=False)
  df = yf.download(ticker, period="7d", interval="1m", progress=False)
Model is not converging.  Current: 11939.511889106952 is not greater than 11940.102353175864. Delta is -0.5904640689113876
  df = yf.download(ticker, period="7d", interval="1m", progress=False)
Model is not converging.  Current: 10430.981198420463 is not greater than 10433.960260875036. Delta is -2.979062454573068
  df = yf.dow

Processed batch, total rows so far: 273


Model is not converging.  Current: 293.6942808309349 is not greater than 293.7923925210471. Delta is -0.098111690112205
  df = yf.download(ticker, period="7d", interval="1m", progress=False)
Model is not converging.  Current: 11291.668457665673 is not greater than 11293.105687570234. Delta is -1.4372299045608088
  df = yf.download(ticker, period="7d", interval="1m", progress=False)
  df = yf.download(ticker, period="7d", interval="1m", progress=False)
Model is not converging.  Current: 8814.32123127461 is not greater than 8819.903110057252. Delta is -5.58187878264107
Model is not converging.  Current: 8815.148869125083 is not greater than 8815.413688994562. Delta is -0.2648198694787425
  df = yf.download(ticker, period="7d", interval="1m", progress=False)
Model is not converging.  Current: 7045.695921900121 is not greater than 7045.853370436104. Delta is -0.15744853598334885
Model is not converging.  Current: 269.72504440313344 is not greater than 269.7538513173699. Delta is -0.0288069

Processed batch, total rows so far: 278


  df = yf.download(ticker, period="7d", interval="1m", progress=False)
Model is not converging.  Current: 7648.103023576429 is not greater than 7648.704427866738. Delta is -0.6014042903088921


Processed batch, total rows so far: 279
Processed batch, total rows so far: 279
Processed batch, total rows so far: 279
Error fetching data for UI: Too Many Requests. Rate limited. Try after a while.
Error fetching data for UONEK: Too Many Requests. Rate limited. Try after a while.
Error fetching data for BALY: Too Many Requests. Rate limited. Try after a while.
Error fetching data for MUA: Too Many Requests. Rate limited. Try after a while.
Error fetching data for VRTS: Too Many Requests. Rate limited. Try after a while.
Error fetching data for YRD: Too Many Requests. Rate limited. Try after a while.
Error fetching data for COFS: Too Many Requests. Rate limited. Try after a while.
Error fetching data for BTT: Too Many Requests. Rate limited. Try after a while.
Error fetching data for HVT: Too Many Requests. Rate limited. Try after a while.
Error fetching data for QCRH: Too Many Requests. Rate limited. Try after a while.
Error fetching data for SNCR: Too Many Requests. Rate limited. Tr

In [6]:
# Show all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
filterdf["Shannon/IV"] = filterdf["ShannonEntropy"] / filterdf["avg_iv"]
display(filterdf)


Unnamed: 0,symbol,price,volume,historical_volatility,avg_iv,iv/hv_ratio,mean_openInterest,hmm_mean_0,hmm_std_0,hmm_mean_1,hmm_std_1,hmm_mean_2,hmm_std_2,pct_last30_in_highest_stdev_state,highest_stdev_state,pct_regimes_last30_higher_than_avg_iv,ShannonEntropy,Regimeswitchprob,Regimeswitchcount,ADF_Pvalue,AvgReturn_7d,Shannon/IV
0,PLTR,181.020004,53472900,0.431344,0.335212,0.777133,10985.0,0.016163,0.028561,0.001052,0.042268,-0.033786,0.108194,0.0,2,100.0,1.224542,0.01433604,0.0,0.057119,Ticker NVDA -0.000001 dtype: float64,3.653039
1,F,11.44,52469200,0.219591,0.241707,1.100714,12697.25,0.001775,0.021355,0.003446,0.023175,-0.009879,0.063742,0.0,2,100.0,1.224542,0.01390039,0.0,0.057119,Ticker NVDA -0.000001 dtype: float64,5.066227
2,NVDA,182.020004,129554000,0.24741,0.310615,1.255466,35840.5,0.009817,0.060145,-0.022765,0.042011,0.007424,0.02178,0.0,0,100.0,1.224542,0.0146745,0.0,0.057119,Ticker NVDA -0.000001 dtype: float64,3.942316
3,TSLA,335.579987,75000700,0.442691,0.270698,0.611484,9084.0,0.000377,0.03957,0.003978,0.037661,-0.026392,0.118798,0.0,2,100.0,1.224542,0.01823175,0.0,0.057119,Ticker NVDA -0.000001 dtype: float64,4.523641
4,SOFI,23.290001,52370500,0.472104,0.309821,0.656257,4887.25,0.012986,0.034474,0.001962,0.033463,-0.024042,0.09083,0.0,2,100.0,1.224542,0.01327218,0.0,0.057119,Ticker NVDA -0.000001 dtype: float64,3.952412
5,INTC,23.860001,188052500,0.561838,0.495122,0.881254,19366.25,0.004966,0.030052,-0.007702,0.043408,0.126959,0.109599,0.0,2,50.0,1.224542,0.01594031,0.0,0.057119,Ticker NVDA -0.000001 dtype: float64,2.473211
6,AMD,180.949997,66308800,0.473977,0.280891,0.592626,11897.25,0.007949,0.033387,-0.003158,0.034929,0.168632,0.123665,0.0,2,100.0,0.256628,0.1823108,10.0,0.884304,Ticker AAL 0.000043 dtype: float64,0.913622
7,APLD,14.55,20751300,1.192506,0.595707,0.499542,2259.0,-0.444996,0.100522,0.209798,0.114317,-0.004794,0.058884,3.333333,1,100.0,0.256628,0.1811576,10.0,0.884304,Ticker AAL 0.000043 dtype: float64,0.430796
8,NIO,4.45,43446300,0.80514,0.658207,0.817506,19078.5,0.00807,0.041421,-0.012753,0.035139,0.095685,0.071445,6.666667,2,6.666667,0.256628,0.1813345,10.0,0.884304,Ticker AAL 0.000043 dtype: float64,0.38989
9,AAPL,232.779999,51916300,0.289689,0.170571,0.588807,9604.75,-0.050356,0.062713,0.010414,0.100058,0.000481,0.019666,0.0,1,100.0,0.256628,0.1822639,10.0,0.884304,Ticker AAL 0.000043 dtype: float64,1.504524


In [8]:
import datetime
filterdf["AvgReturn_7d"] = 0 
today = datetime.datetime.today().strftime('%Y-%m-%d')
filename = f'/Users/nshaffer/Desktop/equity_vol_screen_{today}.csv'
filterdf.to_csv(filename, index=False)
print(f"Results saved to {filename}")

Results saved to /Users/nshaffer/Desktop/equity_vol_screen_2025-08-15.csv


In [7]:
import nest_asyncio
nest_asyncio.apply()
import asyncio
from ib_insync import *
import pandas as pd
import numpy as np
import ta

# ===== CONFIG =====
BAR_SIZE = '1 day'
DURATION = '5 Y'
NUM_BARS = 2000
VOL_WINDOW = 30
VOL_THRESHOLD = 0.001
REGIME_THRESHOLD = -0.1
ADX_PERIOD = 14
ADX_THRESHOLD = 20
SMA_PERIOD = 200
EXCHANGE = 'SMART'
CURRENCY = 'USD'
LOOKAHEAD = 4
K_NEIGHBORS = 8
FEATURES = ['RSI', 'ADX', 'CCI', 'MACD']


# Preload IBKR connection for reuse
ib = IB()

async def fetch_ibkr_stock(symbol):
    contract = Stock(symbol, EXCHANGE, CURRENCY)
    await ib.qualifyContractsAsync(contract)
    bars = await ib.reqHistoricalDataAsync(
        contract=contract,
        endDateTime='',
        durationStr=DURATION,
        barSizeSetting=BAR_SIZE,
        whatToShow='TRADES',
        useRTH=True,
        formatDate=1
    )
    df = util.df(bars).set_index('date').tail(NUM_BARS)
    return symbol, df


def lorentzian_distance_matrix(target, anchors):
    """Vectorized Lorentzian distance calculation."""
    return np.sum(np.log1p(np.abs(anchors - target)), axis=1)


def volatility_filter(df):
    atr = ta.volatility.AverageTrueRange(
        df['high'], df['low'], df['close'], window=VOL_WINDOW
    ).average_true_range()
    return (atr / df['close']).to_numpy() > VOL_THRESHOLD


def regime_filter(df):
    returns = df['close'].pct_change()
    return returns.rolling(window=20).mean().to_numpy() > REGIME_THRESHOLD


def adx_filter(df):
    adx = ta.trend.ADXIndicator(
        df['high'], df['low'], df['close'], window=ADX_PERIOD
    ).adx()
    return adx.to_numpy() > ADX_THRESHOLD


def classify_lorentzian_knn_with_filters(df):
    closes = df['close'].values
    feature_mat = df[FEATURES].values
    sma = df['SMA'].values

    vol_mask = volatility_filter(df)
    regime_mask = regime_filter(df)
    adx_mask = adx_filter(df)

    valid_filter = vol_mask & regime_mask & adx_mask

    pred = np.full(len(df), np.nan)
    length = len(df)

    for idx in range(500, length):
        if not valid_filter[idx] or np.isnan(sma[idx]) or closes[idx] == 0:
            continue

        anchor_start = max(0, idx - 500)
        anchors = feature_mat[anchor_start:idx]
        if anchors.shape[0] < K_NEIGHBORS:
            continue

        dists = lorentzian_distance_matrix(feature_mat[idx], anchors)
        knn_indices = np.argsort(dists)[:K_NEIGHBORS]
        anchor_indices = np.arange(anchor_start, idx)[knn_indices]

        valid_idx = anchor_indices + LOOKAHEAD < length
        selected = anchor_indices[valid_idx]
        if len(selected) == 0:
            continue

        y_train = np.sign(closes[selected + LOOKAHEAD] - closes[selected])
        vote = y_train.sum()
        raw_signal = np.sign(vote) if vote != 0 else 0

        if raw_signal == 1 and closes[idx] > sma[idx]:
            pred[idx] = 1
        elif raw_signal == -1 and closes[idx] < sma[idx]:
            pred[idx] = -1
        else:
            pred[idx] = 0

    return pred


async def process_symbols_with_knn(filterdf):
    await ib.connectAsync('127.0.0.1', 7497, clientId=np.random.randint(1000, 10000))
    
    # Fetch all data concurrently
    tasks = [fetch_ibkr_stock(sym) for sym in filterdf['symbol']]
    results = await asyncio.gather(*tasks)

    for symbol, df in results:
        print(f"Processing {symbol}...")
        df['RSI'] = ta.momentum.RSIIndicator(df['close'], window=14).rsi()
        df['ADX'] = ta.trend.ADXIndicator(df['high'], df['low'], df['close'], window=14).adx()
        df['CCI'] = ta.trend.CCIIndicator(df['high'], df['low'], df['close'], window=20).cci()
        df['SMA'] = ta.trend.SMAIndicator(df['close'], window=SMA_PERIOD).sma_indicator()
        macd_ind = ta.trend.MACD(df['close'])
        df['MACD'] = macd_ind.macd()

        df = df.dropna().reset_index(drop=True)

        preds = classify_lorentzian_knn_with_filters(df)
        last_8 = preds[-8:]
        
        pct_buy = np.sum(last_8 == 1) / len(last_8) * 100
        pct_sell = np.sum(last_8 == -1) / len(last_8) * 100

        filterdf.loc[filterdf['symbol'] == symbol, 'pct_buy_last_8d'] = pct_buy
        filterdf.loc[filterdf['symbol'] == symbol, 'pct_sell_last_8d'] = pct_sell

    ib.disconnect()
    return filterdf


if __name__ == '__main__':
    # Example dataframe

    df_results = asyncio.run(process_symbols_with_knn(filterdf))
    print(df_results)


API connection failed: ConnectionRefusedError(61, "Connect call failed ('127.0.0.1', 7497)")
Make sure API port on TWS/IBG is open


ConnectionRefusedError: [Errno 61] Connect call failed ('127.0.0.1', 7497)

In [None]:
filterdf

Unnamed: 0,language,region,quoteType,typeDisp,quoteSourceName,triggerable,customPriceAlertConfidence,currency,shortName,bid,ask,regularMarketChangePercent,exchange,fiftyTwoWeekHigh,fiftyTwoWeekLow,averageAnalystRating,dividendYield,hasPrePostMarketData,firstTradeDateMilliseconds,priceHint,postMarketChangePercent,postMarketTime,postMarketPrice,postMarketChange,regularMarketChange,regularMarketTime,regularMarketPrice,regularMarketDayHigh,regularMarketDayRange,regularMarketDayLow,regularMarketVolume,regularMarketPreviousClose,bidSize,askSize,market,messageBoardId,fullExchangeName,longName,financialCurrency,regularMarketOpen,averageDailyVolume3Month,averageDailyVolume10Day,corporateActions,fiftyTwoWeekLowChange,fiftyTwoWeekLowChangePercent,fiftyTwoWeekRange,fiftyTwoWeekHighChange,fiftyTwoWeekHighChangePercent,fiftyTwoWeekChangePercent,dividendDate,earningsTimestamp,earningsTimestampStart,earningsTimestampEnd,earningsCallTimestampStart,earningsCallTimestampEnd,isEarningsDateEstimate,trailingAnnualDividendRate,trailingPE,dividendRate,trailingAnnualDividendYield,marketState,epsTrailingTwelveMonths,epsForward,epsCurrentYear,priceEpsCurrentYear,sharesOutstanding,bookValue,fiftyDayAverage,fiftyDayAverageChange,fiftyDayAverageChangePercent,twoHundredDayAverage,twoHundredDayAverageChange,twoHundredDayAverageChangePercent,marketCap,forwardPE,priceToBook,sourceInterval,exchangeDataDelayedBy,exchangeTimezoneName,exchangeTimezoneShortName,gmtOffSetMilliseconds,esgPopulated,tradeable,cryptoTradeable,displayName,symbol,ipoExpectedDate,prev_day_price,prev_day_volume,price_volume,historical_volatility,avg_iv,iv/hv_ratio,mean_openInterest_x,mean_openInterest_y,mean_openInterest,hmm_mean_0,hmm_std_0,hmm_mean_1,hmm_std_1,hmm_mean_2,hmm_std_2,pct_last30_in_highest_stdev_state,highest_stdev_state,pct_regimes_last30_higher_than_avg_iv,ShannonEntropy,Regimeswitchprob,Regimeswitchcount,ADF_Pvalue,AvgReturn_7d,Shannon/IV,pct_buy_last_8d,pct_sell_last_8d
0,en-US,US,EQUITY,Equity,Nasdaq Real Time Price,True,HIGH,USD,NVIDIA Corporation,173.61,182.83,-2.33316,NMS,183.3,86.62,1.4 - Strong Buy,0.02,True,917015400000,2,-0.70228,1754093000.0,172.5,-1.22,-4.14999,1754078401,173.72,176.54,170.89 - 176.54,170.89,202636888,177.87,4,1,us_market,finmb_32307,NasdaqGS,NVIDIA Corporation,USD,174.09,192781287,161557370,[],87.1,1.005541,86.62 - 183.3,-9.580002,-0.052264,72.94176,1751501000.0,1756325000.0,1756325000.0,1756325000.0,1756328000.0,1756328000.0,False,0.04,56.220066,0.04,0.000225,CLOSED,3.09,4.12,4.30288,40.372963,24387600384,3.438,154.5116,19.208405,0.124317,134.7648,38.9552,0.289061,4236614041600,42.16505,50.529377,15,0,America/New_York,EDT,-14400000,False,False,False,NVIDIA,NVDA,,177.869995,221685400,39431181015.55176,0.253826,0.394598,1.5546,9096.0,9096.0,9096.0,0.006675,0.031073,-0.004445,0.032405,0.110727,0.120742,0.0,2,100.0,0.833454,0.1,6,0.218884,Ticker NVDA 0.000003 dtype: float64,2.112159,100.0,0.0
1,en-US,US,EQUITY,Equity,Nasdaq Real Time Price,True,HIGH,USD,"Lucid Group, Inc.",2.4,2.47,-1.62601,NMS,4.43,1.93,3.1 - Hold,,True,1600435800000,4,0.206606,1754093000.0,2.425,0.005,-0.04,1754078401,2.42,2.44,2.32 - 2.44,2.32,91282600,2.46,488,575,us_market,finmb_83747444,NasdaqGS,"Lucid Group, Inc.",USD,2.375,152561575,131673640,[],0.49,0.253886,1.93 - 4.43,-2.01,-0.453725,-19.333332,,1754424000.0,1754424000.0,1754424000.0,1754429000.0,1754429000.0,False,0.0,,,0.0,CLOSED,-1.19,-0.88,-0.8878,-2.725839,3072480000,1.044,2.3908,0.0292,0.012214,2.516975,-0.096975,-0.038528,7435401728,-2.75,2.318008,15,0,America/New_York,EDT,-14400000,False,False,False,Lucid,LCID,2021-07-26,2.46,65248900,160512296.489048,1.334923,1.16016,0.869084,28425.25,28425.25,28425.25,0.003372,0.051561,-0.011354,0.040461,0.30257,0.106913,3.333333,2,3.333333,1.378179,0.14,7,0.780178,Ticker LCID -0.000083 dtype: float64,1.187921,25.0,25.0
2,en-US,US,EQUITY,Equity,Nasdaq Real Time Price,True,HIGH,USD,"Tesla, Inc.",302.56,302.84,-1.82956,NMS,488.54,182.0,2.7 - Hold,,True,1277818200000,2,-0.522095,1754093000.0,301.05,-1.58002,-5.639984,1754078401,302.63,309.31,297.82 - 309.31,297.82,88358015,308.27,4,1,us_market,finmb_27444752,NasdaqGS,"Tesla, Inc.",USD,306.205,107359361,100896010,[],120.630005,0.662802,182.0 - 488.54,-185.91,-0.380542,52.16714,,1753301000.0,1761163000.0,1761163000.0,1753306000.0,1753306000.0,True,0.0,179.071,,0.0,CLOSED,1.69,3.24,1.67868,180.27856,3225449984,23.981,322.9978,-20.367798,-0.063059,321.5904,-18.960388,-0.058958,976117956608,93.40432,12.619574,15,0,America/New_York,EDT,-14400000,False,False,False,Tesla,TSLA,,308.269989,85270900,26286459406.185917,0.532152,0.508855,0.956221,1248.25,1248.25,1248.25,-0.002173,0.042375,0.000958,0.040056,-0.017269,0.132887,0.0,2,100.0,0.758217,0.07,5,0.755459,Ticker TSLA -0.000007 dtype: float64,1.490045,0.0,0.0
3,en-US,US,EQUITY,Equity,Delayed Quote,False,LOW,USD,Ford Motor Company,10.79,10.81,-2.25836,NYQ,11.97,8.44,3.0 - Hold,5.55,True,76253400000,2,-0.184838,1754093000.0,10.8,-0.02,-0.25,1754078402,10.82,10.92,10.6822 - 10.92,10.6822,68916067,11.07,280,488,us_market,finmb_106335,NYSE,Ford Motor Company,USD,10.92,89785446,70396340,"[{'header': 'Dividend', 'message': 'F announce...",2.38,0.281991,8.44 - 11.97,-1.150001,-0.096074,11.431515,1748822000.0,1753906000.0,1761768000.0,1761768000.0,1753909000.0,1753909000.0,True,0.6,13.871795,0.6,0.054201,CLOSED,0.78,1.75,1.14611,9.440629,3909009920,11.321,10.8888,-0.0688,-0.006318,10.3356,0.4844,0.046867,43062083584,6.182857,0.955746,15,0,America/New_York,EDT,-14400000,False,False,False,,F,,11.07,101090900,1119076232.149506,0.254434,0.348639,1.370252,20423.75,20423.75,20423.75,0.001827,0.02131,0.002969,0.023229,-0.009944,0.063649,0.0,2,50.0,0.711742,0.08,4,0.829486,Ticker F -0.000015 dtype: float64,2.041484,0.0,0.0
4,en-US,US,EQUITY,Equity,Nasdaq Real Time Price,True,HIGH,USD,Intel Corporation,19.28,19.34,-2.47475,NMS,27.55,17.67,3.0 - Hold,,True,322151400000,2,-0.104603,1754093000.0,19.2898,-0.020199,-0.49,1754078400,19.31,19.55,18.965 - 19.55,18.965,86145589,19.8,18,18,us_market,finmb_21127,NasdaqGS,Intel Corporation,USD,19.472,81354487,99272340,[],1.639999,0.092813,17.67 - 27.55,-8.24,-0.299093,-3.978121,1725149000.0,1753387000.0,1753387000.0,1753387000.0,1753391000.0,1753391000.0,False,0.125,,,0.006313,CLOSED,-4.77,0.97,0.12594,153.32698,4376999936,22.363,21.526,-2.216,-0.102945,21.733,-2.423,-0.111489,84519870464,19.907215,0.86348,15,0,America/New_York,EDT,-14400000,False,False,False,Intel,INTC,,19.799999,90665200,1795170890.827942,0.46638,0.452642,0.970544,20577.75,20577.75,20577.75,0.067438,0.07315,-0.02066,0.065088,-0.003633,0.029751,0.0,0,100.0,1.009857,0.16,9,0.257073,Ticker INTC -0.000071 dtype: float64,2.231026,0.0,62.5
5,en-US,US,EQUITY,Equity,Nasdaq Real Time Price,True,HIGH,USD,Palantir Technologies Inc.,154.12,154.42,-2.576572,NMS,160.89,21.23,3.0 - Hold,,True,1601472600000,2,0.576782,1754093000.0,155.16,0.889801,-4.080002,1754078401,154.27,158.19,151.0576 - 158.19,151.0576,59911569,158.35,6,6,us_market,finmb_43580005,NasdaqGS,Palantir Technologies Inc.,USD,155.05,79287858,49283940,[],133.04001,6.266604,21.23 - 160.89,-6.619995,-0.041146,540.3902,,1754338000.0,1754338000.0,1754338000.0,1754341000.0,1754341000.0,False,0.0,670.73914,,0.0,CLOSED,0.23,0.47,0.58178,265.16898,2262909952,2.299,139.8546,14.415405,0.103074,96.59565,57.674355,0.59707,364063293440,328.23404,67.10309,15,0,America/New_York,EDT,-14400000,False,False,False,Palantir,PLTR,2024-11-26,158.350006,45342600,7180000986.749268,0.313068,1.025088,3.274335,2143.25,2143.25,2143.25,0.015767,0.028933,0.001592,0.040863,-0.024689,0.081732,3.333333,2,3.333333,0.442637,0.21,10,0.027734,Ticker PLTR 0.000002 dtype: float64,0.431804,25.0,0.0
6,en-US,US,EQUITY,Equity,Nasdaq Real Time Price,True,HIGH,USD,"SoFi Technologies, Inc.",21.0,22.79,-5.97874,NMS,25.11,6.01,2.8 - Hold,,True,1609770600000,2,-0.375882,1754093000.0,21.1502,-0.0798,-1.35,1754078401,21.23,21.76,20.6 - 21.76,20.6,89014566,22.58,1,2,us_market,finmb_141582707,NasdaqGS,"SoFi Technologies, Inc.",USD,21.7,71582688,93615510,[],15.219999,2.532446,6.01 - 25.11,-3.880001,-0.15452,224.61774,,1753792000.0,1761655000.0,1761655000.0,1753790000.0,1753790000.0,True,0.0,42.46,,0.0,CLOSED,0.5,0.29,0.31217,68.00781,1196179968,6.162,17.4368,3.7932,0.21754,14.5902,6.639799,0.455086,25394900992,73.206894,3.44531,15,0,America/New_York,EDT,-14400000,False,False,False,SoFi,SOFI,2021-06-01,22.58,120095300,2711751864.837456,0.470831,0.636234,1.3513,2317.0,2317.0,2317.0,0.008951,0.111168,-0.066168,0.057826,0.006413,0.034212,0.0,0,0.0,0.891635,0.2,10,0.138222,Ticker SOFI -0.000003 dtype: float64,1.401426,75.0,0.0
7,en-US,US,EQUITY,Equity,Nasdaq Real Time Price,True,HIGH,USD,"American Airlines Group, Inc.",11.01,11.07,-4.00348,NMS,19.1,8.5,2.1 - Buy,,True,1127827800000,2,0.0,1754093000.0,11.03,0.0,-0.46,1754078401,11.03,11.21,10.9 - 11.21,10.9,71082347,11.49,2,31,us_market,finmb_168569,NasdaqGS,American Airlines Group Inc.,USD,11.21,61488524,73583360,[],2.53,0.297647,8.5 - 19.1,-8.070001,-0.422513,18.857754,1582070000.0,1753360000.0,1761223000.0,1761223000.0,1753360000.0,1753360000.0,True,0.0,13.130953,,0.0,CLOSED,0.84,2.02,0.43114,25.583336,659828992,-5.865,11.53,-0.5,-0.043365,13.292,-2.262,-0.170178,7277913600,5.460396,-1.880648,15,0,America/New_York,EDT,-14400000,False,False,False,American Airlines,AAL,,11.49,84751000,973788970.602036,0.643543,0.469244,0.729156,3532.5,3532.5,3532.5,0.128325,0.097144,-0.072552,0.058652,-0.001553,0.027718,3.333333,0,10.0,1.052653,0.05,3,0.656404,Ticker AAL -0.00002 dtype: float64,2.243297,0.0,37.5
8,en-US,US,EQUITY,Equity,Delayed Quote,True,HIGH,USD,Apple Inc.,192.07,213.25,-2.500362,NMS,260.1,169.21,1.9 - Buy,0.51,True,345479400000,2,-0.039531,1754093000.0,202.3,-0.080002,-5.190002,1754078402,202.38,213.58,201.5 - 213.58,201.5,97203859,207.57,1,1,us_market,finmb_24937,NasdaqGS,Apple Inc.,USD,210.89,53580620,55084450,"[{'header': 'Dividend', 'message': 'AAPL annou...",33.17,0.196029,169.21 - 260.1,-57.72,-0.221915,-3.2924,1747267000.0,1753992000.0,1761854000.0,1761854000.0,1753996000.0,1753996000.0,True,1.01,30.663637,1.04,0.004866,CLOSED,6.6,8.31,7.38723,27.395926,14840399872,4.431,205.277,-2.896988,-0.014113,221.5797,-19.199692,-0.086649,3003400323072,24.35379,45.67366,15,0,America/New_York,EDT,-14400000,False,False,False,Apple,AAPL,,207.570007,80698400,16750567479.052734,0.143408,0.321967,2.245113,2029.75,2029.75,2029.75,0.001622,0.021692,-0.00443,0.023309,0.017797,0.110934,0.0,2,100.0,0.482193,0.04,2,0.994828,Ticker AAPL -0.000021 dtype: float64,1.497647,0.0,37.5
9,en-US,US,EQUITY,Equity,Nasdaq Real Time Price,True,HIGH,USD,"Advanced Micro Devices, Inc.",171.58,171.83,-2.61471,NMS,182.5,76.48,1.7 - Buy,,True,322151400000,2,-0.588232,1754093000.0,170.69,-1.00999,-4.61,1754078400,171.7,174.395,166.82 - 174.395,166.82,74849357,176.31,6,7,us_market,finmb_168864,NasdaqGS,"Advanced Micro Devices, Inc.",USD,170.14,51417843,61966810,[],95.21999,1.245031,76.48 - 182.5,-10.800003,-0.059178,27.354992,,1754424000.0,1754424000.0,1754424000.0,1754428000.0,1754428000.0,False,0.0,125.32846,,0.0,CLOSED,1.37,5.1,3.97856,43.15632,1621400064,35.817,137.7562,33.9438,0.246405,123.5056,48.194397,0.39022,278394372096,33.666668,4.793813,15,0,America/New_York,EDT,-14400000,False,False,False,Advanced Micro Devices,AMD,,176.309998,71765300,12652939867.791748,0.38335,0.767916,2.003173,1359.5,1359.5,1359.5,-0.058313,0.056973,0.133844,0.137063,0.005515,0.028071,0.0,1,0.0,0.351099,0.19,10,0.326865,Ticker AMD 0.000029 dtype: float64,0.457211,75.0,0.0


In [None]:
import datetime
filterdf["AvgReturn_7d"] = 0 
today = datetime.datetime.today().strftime('%Y-%m-%d')
filename = f'/Users/nshaffer/Desktop/equity_vol_screen_{today}.csv'
filterdf.to_csv(filename, index=False)
print(f"Results saved to {filename}")


Results saved to /Users/nshaffer/Desktop/equity_vol_screen_2025-08-02.csv
