Do a random forest return predictor for long time equity screens, maybe including rates/ sectors / commodities / geoploitic indicators. 
Find the % of regimes in the last 30 days that have a higher volatility than the current IV 
Add trend in shannon entropy or/and regime percentages to evaluate recessions / hostile environments beginning. could also look at earnings timestamps affecting options prices 

Want to find stocks with a low hv/iv high regime % in highest stdev state, and high # of days with stdev above avg iv on options 

In [3]:
import yfinance as yf 
from yfinance import EquityQuery
import numpy as np
import pandas as pd
import time


In [4]:
from yfinance import EquityQuery, screen
import yfinance as yf
import pandas as pd
import numpy as np
import datetime
import time
num_stocks = 250  # Number of stocks to process in each batch

# --- Helper Functions ---


def find_closest_value(data_list, target_value):
    if len(data_list) == 0:
        return None
    return min(data_list, key=lambda x: abs(x - target_value))


def filter_special_expiry(expiry_list):
    """Return the two soonest (earliest) expiry dates from the list."""
    sorted_expiries = sorted(expiry_list)
    return sorted_expiries[:2]


def get_prev_day_price_volume(symbol):
    """Fetch the most recent full day's close price and volume for the symbol."""
    try:
        data = yf.Ticker(symbol).history(period="5d")
        if data.shape[0] < 2:
            return None, None
        prev_day = data.iloc[-2]
        price = float(prev_day['Close'])
        volume = int(prev_day['Volume'])
        return price, volume
    except Exception as e:
        print(f"Error fetching previous day price/volume for {symbol}: {e}")
        return None, None


def get_historical_volatility(symbol, window=20):
    try:
        data = yf.Ticker(symbol).history(period=f"{window*2}d")
        if data.shape[0] < window + 1:
            return None
        data = data['Close'].dropna()
        log_returns = np.log(data / data.shift(1)).dropna()
        vol = log_returns[-window:].std() * np.sqrt(252)
        return vol
    except Exception as e:
        print(f"Error calculating historical volatility for {symbol}: {e}")
        return None


def get_options(symbol, max_retries=3, retry_delay=60):
    for attempt in range(max_retries):
        try:
            stock = yf.Ticker(symbol)
            options_chain = stock.options
            if not options_chain:
                return pd.DataFrame(columns=['strike', 'impliedVolatility', 'openInterest', 'symbol'])
            filtered_expiries = filter_special_expiry(options_chain)
            options_data = []
            current_price, _ = get_prev_day_price_volume(symbol)
            if current_price is None:
                continue
            for expiry in filtered_expiries:
                chain = stock.option_chain(expiry)
                for opt_type, df in [('call', chain.calls), ('put', chain.puts)]:
                    if not df.empty:
                        closest_strike = find_closest_value(df['strike'], current_price)
                        filtered = df[df['strike'] == closest_strike].copy()
                        filtered['type'] = opt_type
                        filtered['expiry'] = expiry
                        filtered['symbol'] = symbol
                        options_data.append(filtered)
            if options_data:
                result = pd.concat(options_data, ignore_index=True)
                result = result[result['openInterest'] > 500]
                return result[['strike', 'impliedVolatility', 'openInterest', 'symbol']]
            else:
                return pd.DataFrame(columns=['strike', 'impliedVolatility', 'openInterest', 'symbol'])
        except Exception as e:
            print(f"Error fetching options for {symbol}: {e}")
            if attempt < max_retries - 1:
                time.sleep(retry_delay)
                continue
            else:
                print(f"Max retries reached for {symbol}.")
                return pd.DataFrame(columns=['strike', 'impliedVolatility', 'openInterest', 'symbol'])
    return pd.DataFrame(columns=['strike', 'impliedVolatility', 'openInterest', 'symbol'])


# --- Screen Query ---
screen_query = EquityQuery("and", [
    EquityQuery("is-in", ["region", "us"]),
    EquityQuery("is-in", ["sector",
        "Technology",
        "Financial Services",
        "Consumer Cyclical",
        "Communication Services",
        "Basic Materials",
        "Industrials"
    ]),
    EquityQuery("GTE", ["eodprice", 0.50]),
    EquityQuery("GTE", ["avgdailyvol3m", 100000]),
    EquityQuery("LTE", ["short_percentage_of_float.value", 100])
])


# --- Batch Processing Function ---
def process_batch(offset, filterdf, options):
    batch_size = num_stocks
    screener = screen(
        screen_query,
        sortField="avgdailyvol3m",
        sortAsc=False,
        size=batch_size,
        offset=offset
    )
    stocks = screener.get('quotes', [])
    if not stocks:
        print("No more stocks to load.")
        return filterdf, options, offset


    current_filterdf = pd.DataFrame(stocks)


    # --- Add price*volume filter for most recent full day ---
    current_filterdf['prev_day_price'] = None
    current_filterdf['prev_day_volume'] = None
    current_filterdf['price_volume'] = None


    for idx, row in current_filterdf.iterrows():
        symbol = row['symbol']
        price, volume = get_prev_day_price_volume(symbol)
        current_filterdf.at[idx, 'prev_day_price'] = price
        current_filterdf.at[idx, 'prev_day_volume'] = volume
        if price is not None and volume is not None:
            current_filterdf.at[idx, 'price_volume'] = price * volume


    current_filterdf = current_filterdf[current_filterdf['price_volume'] > 100_000_000]


    if current_filterdf.empty:
        print("No stocks passed the price*volume filter in this batch.")
        offset += batch_size
        return filterdf, options, offset


    current_filterdf['historical_volatility'] = None
    for idx, row in current_filterdf.iterrows():
        symbol = row['symbol']
        hv = get_historical_volatility(symbol, window=20)
        current_filterdf.at[idx, 'historical_volatility'] = hv


    filterstocks = current_filterdf['symbol'].tolist()
    options_list = []
    for symbol in filterstocks:
        options_data = get_options(symbol)
        options_list.append(options_data)


    if options_list:
        current_options = pd.concat(options_list, ignore_index=True)
        options = pd.concat([options, current_options], ignore_index=True)
    else:
        current_options = pd.DataFrame(columns=['strike', 'impliedVolatility', 'openInterest', 'symbol'])


    if not current_options.empty:
        qualifying_symbols = set(current_options['symbol'].str.upper().unique())
        current_filterdf = current_filterdf[current_filterdf['symbol'].str.upper().isin(qualifying_symbols)]
    else:
        current_filterdf = current_filterdf.iloc[0:0]


    if current_filterdf.empty:
        print("No stocks passed the open interest filter in this batch.")
        offset += batch_size
        return filterdf, options, offset


    if not options.empty and 'symbol' in options.columns and 'impliedVolatility' in options.columns:
        avg_iv = options.groupby('symbol')['impliedVolatility'].mean().reset_index()
        avg_iv.columns = ['symbol', 'avg_iv']
    else:
        avg_iv = pd.DataFrame(columns=['symbol', 'avg_iv'])


    current_filterdf['symbol'] = current_filterdf['symbol'].astype(str).str.upper()
    avg_iv['symbol'] = avg_iv['symbol'].astype(str).str.upper()
    current_filterdf = current_filterdf.merge(avg_iv, on='symbol', how='left')


    if 'avg_iv' in current_filterdf.columns and 'historical_volatility' in current_filterdf.columns:
        current_filterdf["iv/hv_ratio"] = current_filterdf["avg_iv"] / current_filterdf["historical_volatility"]
    else:
        current_filterdf["iv/hv_ratio"] = None


    filterdf = pd.concat([filterdf, current_filterdf], ignore_index=True)

    # --- ADD: Append mean openInterest per symbol ---
    if not options.empty and 'symbol' in options.columns and 'openInterest' in options.columns:
        mean_open_interest = options.groupby('symbol')['openInterest'].mean().reset_index()
        mean_open_interest.rename(columns={'openInterest': 'mean_openInterest'}, inplace=True)
        mean_open_interest['symbol'] = mean_open_interest['symbol'].astype(str).str.upper()
        filterdf['symbol'] = filterdf['symbol'].astype(str).str.upper()
        filterdf = filterdf.merge(mean_open_interest, on='symbol', how='left')

    offset += batch_size
    return filterdf, options, offset


# --- Main loop ---
offset = 0
filterdf = pd.DataFrame()
options = pd.DataFrame()


filterdf, options, offset = process_batch(offset, filterdf, options)
print("Initial batch:")
print(filterdf)


while True:
    user_input = input("Process next 100 stocks? (y/n): ")
    if user_input.lower() != 'y':
        break
    filterdf, options, offset = process_batch(offset, filterdf, options)
    print("Updated results:")
    print(filterdf)


Initial batch:
    language region quoteType typeDisp         quoteSourceName  triggerable  \
0      en-US     US    EQUITY   Equity  Nasdaq Real Time Price         True   
1      en-US     US    EQUITY   Equity  Nasdaq Real Time Price         True   
2      en-US     US    EQUITY   Equity  Nasdaq Real Time Price         True   
3      en-US     US    EQUITY   Equity           Delayed Quote        False   
4      en-US     US    EQUITY   Equity  Nasdaq Real Time Price         True   
..       ...    ...       ...      ...                     ...          ...   
128    en-US     US    EQUITY   Equity           Delayed Quote         True   
129    en-US     US    EQUITY   Equity           Delayed Quote        False   
130    en-US     US    EQUITY   Equity           Delayed Quote         True   
131    en-US     US    EQUITY   Equity           Delayed Quote        False   
132    en-US     US    EQUITY   Equity           Delayed Quote        False   

    customPriceAlertConfidence curre

  current_options = pd.concat(options_list, ignore_index=True)


Updated results:
    language region quoteType typeDisp         quoteSourceName  triggerable  \
0      en-US     US    EQUITY   Equity  Nasdaq Real Time Price         True   
1      en-US     US    EQUITY   Equity  Nasdaq Real Time Price         True   
2      en-US     US    EQUITY   Equity  Nasdaq Real Time Price         True   
3      en-US     US    EQUITY   Equity           Delayed Quote        False   
4      en-US     US    EQUITY   Equity  Nasdaq Real Time Price         True   
..       ...    ...       ...      ...                     ...          ...   
203    en-US     US    EQUITY   Equity  Nasdaq Real Time Price         True   
204    en-US     US    EQUITY   Equity  Nasdaq Real Time Price         True   
205    en-US     US    EQUITY   Equity           Delayed Quote        False   
206    en-US     US    EQUITY   Equity           Delayed Quote        False   
207    en-US     US    EQUITY   Equity  Nasdaq Real Time Price         True   

    customPriceAlertConfidence cur

  current_options = pd.concat(options_list, ignore_index=True)


Updated results:
    language region quoteType typeDisp         quoteSourceName  triggerable  \
0      en-US     US    EQUITY   Equity  Nasdaq Real Time Price         True   
1      en-US     US    EQUITY   Equity  Nasdaq Real Time Price         True   
2      en-US     US    EQUITY   Equity  Nasdaq Real Time Price         True   
3      en-US     US    EQUITY   Equity           Delayed Quote        False   
4      en-US     US    EQUITY   Equity  Nasdaq Real Time Price         True   
..       ...    ...       ...      ...                     ...          ...   
247    en-US     US    EQUITY   Equity           Delayed Quote        False   
248    en-US     US    EQUITY   Equity           Delayed Quote        False   
249    en-US     US    EQUITY   Equity  Nasdaq Real Time Price        False   
250    en-US     US    EQUITY   Equity  Nasdaq Real Time Price         True   
251    en-US     US    EQUITY   Equity           Delayed Quote        False   

    customPriceAlertConfidence cur

In [5]:
import yfinance as yf
import numpy as np
from hmmlearn.hmm import GaussianHMM
from tqdm import tqdm

def get_last_120_returns(symbol):
    """Get last 120 days of daily log returns for a symbol."""
    try:
        data = yf.Ticker(symbol).history(period="130d")
        closes = data['Close'].dropna()
        if len(closes) < 121:
            return None
        returns = np.log(closes / closes.shift(1)).dropna()
        return returns[-120:]
    except Exception as e:
        print(f"Error fetching data for {symbol}: {e}")
        return None

def fit_hmm_and_stats(returns, n_states=3):
    """Fit HMM to returns, return means, stds, and state sequence."""
    model = GaussianHMM(
        n_components=n_states,
        covariance_type="diag",
        n_iter=500,
        tol=1e-2,
        random_state=42,
        verbose=False
    )
    X = returns.values.reshape(-1, 1)
    model.fit(X)
    hidden_states = model.predict(X)
    means = model.means_.flatten()
    stds = np.sqrt(np.array([np.diag(cov)[0] for cov in model.covars_]))
    return hidden_states, means, stds

def hmm_analysis(symbol):
    returns = get_last_120_returns(symbol)
    if returns is None:
        return {
            "hmm_mean_0": None,
            "hmm_std_0": None,
            "hmm_mean_1": None,
            "hmm_std_1": None,
            "hmm_mean_2": None,
            "hmm_std_2": None,
            "pct_last30_in_highest_stdev_state": None,
            "highest_stdev_state": None
        }
    hidden_states, means, stds = fit_hmm_and_stats(returns, n_states=3)
    # Identify the state with the highest stdev
    high_stdev_state = int(np.argmax(stds))
    last30 = hidden_states[-30:]
    pct_high = np.mean(last30 == high_stdev_state) * 100
    return {
        "hmm_mean_0": means[0],
        "hmm_std_0": stds[0],
        "hmm_mean_1": means[1],
        "hmm_std_1": stds[1],
        "hmm_mean_2": means[2],
        "hmm_std_2": stds[2],
        "pct_last30_in_highest_stdev_state": pct_high,
        "highest_stdev_state": high_stdev_state
    }

# --- Apply to filterdf ---

filterdf['symbol'] = filterdf['symbol'].astype(str).str.upper()

for col in [
    'hmm_mean_0','hmm_std_0',
    'hmm_mean_1','hmm_std_1',
    'hmm_mean_2','hmm_std_2',
    'pct_last30_in_highest_stdev_state',
    'highest_stdev_state'
]:
    filterdf[col] = None

for idx, row in tqdm(filterdf.iterrows(), total=len(filterdf)):
    symbol = row['symbol']
    hmm_stats = hmm_analysis(symbol)
    for col in hmm_stats:
        filterdf.at[idx, col] = hmm_stats[col]

# filterdf now has the HMM columns for states 0, 1, 2, and the % of last 30 closes in the highest stdev state.


  6%|▌         | 15/252 [00:02<00:42,  5.53it/s]Model is not converging.  Current: 287.0377515827018 is not greater than 287.0511136833532. Delta is -0.013362100651420405
  6%|▋         | 16/252 [00:03<00:42,  5.61it/s]Model is not converging.  Current: 294.8717833392088 is not greater than 294.89808216496533. Delta is -0.02629882575655529
  9%|▉         | 23/252 [00:04<00:42,  5.38it/s]Model is not converging.  Current: 185.9532831123019 is not greater than 185.95818812590238. Delta is -0.004905013600478014
 10%|▉         | 24/252 [00:04<00:46,  4.93it/s]Model is not converging.  Current: 326.71725611600357 is not greater than 326.7295324945106. Delta is -0.012276378507010577
 10%|▉         | 25/252 [00:04<00:47,  4.83it/s]Model is not converging.  Current: 309.7562995228235 is not greater than 310.04481635080475. Delta is -0.28851682798125466
 13%|█▎        | 33/252 [00:06<00:52,  4.14it/s]Model is not converging.  Current: 278.295377800982 is not greater than 278.2955804369564. Delt

In [6]:
N = 252  # Or use 261, depending on your convention

def pct_regimes_higher_than_iv(hidden_states, stds, avg_iv):
    """% of regimes in last 30 days with annualized stdev higher than avg_iv."""
    last30 = hidden_states[-30:]
    # Annualize regime stdevs
    stds_annualized = stds * np.sqrt(N)
    last30_vols_annualized = stds_annualized[last30]
    pct_higher = np.mean(last30_vols_annualized > avg_iv) * 100
    return pct_higher

filterdf['pct_regimes_last30_higher_than_avg_iv'] = None

for idx, row in tqdm(filterdf.iterrows(), total=len(filterdf)):
    symbol = row['symbol']
    avg_iv = row['avg_iv']
    returns = get_last_120_returns(symbol)
    if returns is None or avg_iv is None:
        filterdf.at[idx, 'pct_regimes_last30_higher_than_avg_iv'] = None
        continue
    hidden_states, means, stds = fit_hmm_and_stats(returns, n_states=3)
    pct_higher = pct_regimes_higher_than_iv(hidden_states, stds, avg_iv)
    filterdf.at[idx, 'pct_regimes_last30_higher_than_avg_iv'] = pct_higher


  6%|▌         | 15/252 [00:03<01:02,  3.82it/s]Model is not converging.  Current: 287.0377515827018 is not greater than 287.0511136833532. Delta is -0.013362100651420405
  6%|▋         | 16/252 [00:03<00:56,  4.21it/s]Model is not converging.  Current: 294.8717833392088 is not greater than 294.89808216496533. Delta is -0.02629882575655529
  9%|▉         | 23/252 [00:04<00:45,  5.04it/s]Model is not converging.  Current: 185.9532831123019 is not greater than 185.95818812590238. Delta is -0.004905013600478014
 10%|▉         | 24/252 [00:05<00:48,  4.71it/s]Model is not converging.  Current: 326.7172728914914 is not greater than 326.72954902323426. Delta is -0.012276131742851248
 10%|▉         | 25/252 [00:05<00:50,  4.50it/s]Model is not converging.  Current: 309.7563004089375 is not greater than 310.0448165227642. Delta is -0.2885161138267449
 13%|█▎        | 33/252 [00:07<00:46,  4.69it/s]Model is not converging.  Current: 278.295377800982 is not greater than 278.2955804369564. Delta 

In [7]:
import numpy as np
import pandas as pd
import yfinance as yf
from scipy.stats import entropy
from hmmlearn.hmm import GaussianHMM
from statsmodels.tsa.stattools import adfuller

def shannon_entropy(series, bins=10):
    hist, _ = np.histogram(series, bins=bins, density=True)
    hist = hist[hist > 0]
    return entropy(hist, base=2)

def hmm_regime_switch_prob_and_count(returns, n_states=2):
    returns = returns.values.reshape(-1, 1)
    model = GaussianHMM(n_components=n_states, covariance_type="diag", n_iter=100)
    model.fit(returns)
    hidden_states = model.predict(returns)
    last_state = hidden_states[-1]
    transmat = model.transmat_
    switch_prob = 1 - transmat[last_state, last_state]
    regime_switch_count = np.sum(hidden_states[1:] != hidden_states[:-1])
    return switch_prob, regime_switch_count

entropy_list = []
regimeswitchprob_list = []
regimeswitchcount_list = []
adf_pvalue_list = []
avg_return_list = []

for ticker in filterdf['symbol']:
    try:
        # Download last 7 days of 1-minute data
        df = yf.download(ticker, period="7d", interval="1m", progress=False)
        closes = df['Close'].dropna()
        # Make sure you have enough data (e.g., at least 1000 minutes)
        if len(closes) < 1000:
            entropy_list.append(np.nan)
            regimeswitchprob_list.append(np.nan)
            regimeswitchcount_list.append(np.nan)
            adf_pvalue_list.append(np.nan)
            avg_return_list.append(np.nan)
            continue
        log_returns = np.log(closes).diff().dropna()         # For entropy, HMM, etc.
        simple_returns = closes.pct_change().dropna()        # For avg return only

        # Shannon entropy (log returns)
        ent = shannon_entropy(log_returns)
        entropy_list.append(ent)
        # HMM regime switch probability and count (log returns)
        switch_prob, switch_count = hmm_regime_switch_prob_and_count(log_returns)
        regimeswitchprob_list.append(round(switch_prob * 100, 2))  # as percentage
        regimeswitchcount_list.append(int(switch_count))
        # ADF test p-value on last 7 days of prices (not returns)
        adf_pvalue = adfuller(closes)[1]
        adf_pvalue_list.append(adf_pvalue)
        # Average return over last 7 days (mean of simple returns)
        avg_return = simple_returns.mean()
        avg_return_list.append(avg_return)
    except Exception as e:
        entropy_list.append(np.nan)
        regimeswitchprob_list.append(np.nan)
        regimeswitchcount_list.append(np.nan)
        adf_pvalue_list.append(np.nan)
        avg_return_list.append(np.nan)


filterdf['ShannonEntropy'] = entropy_list
filterdf['Regimeswitchprob'] = regimeswitchprob_list
filterdf['Regimeswitchcount'] = regimeswitchcount_list
filterdf['ADF_Pvalue'] = adf_pvalue_list
filterdf['AvgReturn_7d'] = avg_return_list


YF.download() has changed argument auto_adjust default to True


Model is not converging.  Current: 14108.051894007485 is not greater than 14108.598752185002. Delta is -0.5468581775166967
Model is not converging.  Current: 12574.45874764933 is not greater than 12575.06601993501. Delta is -0.607272285678846
Model is not converging.  Current: 13725.64182530201 is not greater than 13726.821696479099. Delta is -1.179871177089808
Model is not converging.  Current: 13998.65453609887 is not greater than 13999.487971467925. Delta is -0.8334353690552234
Model is not converging.  Current: 13862.188349305332 is not greater than 13863.78143890813. Delta is -1.5930896027985
Model is not converging.  Current: 14222.459950255055 is not greater than 14222.559724149436. Delta is -0.09977389438063256
Model is not converging.  Current: 13695.030180988953 is not greater than 13695.707342445203. Delta is -0.6771614562494506
Model is not converging.  Current: 12972.706996138313 is not greater than 12973.724380494541. Delta is -1.0173843562279217
Model is not converging. 

In [8]:
# Show all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
filterdf["Shannon/IV"] = filterdf["ShannonEntropy"] / filterdf["avg_iv"]
display(filterdf)


Unnamed: 0,language,region,quoteType,typeDisp,quoteSourceName,triggerable,customPriceAlertConfidence,currency,shortName,bid,ask,regularMarketChangePercent,exchange,fiftyTwoWeekHigh,fiftyTwoWeekLow,averageAnalystRating,dividendYield,hasPrePostMarketData,firstTradeDateMilliseconds,priceHint,postMarketChangePercent,postMarketTime,postMarketPrice,postMarketChange,regularMarketChange,regularMarketTime,regularMarketPrice,regularMarketDayHigh,regularMarketDayRange,regularMarketDayLow,regularMarketVolume,regularMarketPreviousClose,bidSize,askSize,market,messageBoardId,fullExchangeName,longName,financialCurrency,regularMarketOpen,averageDailyVolume3Month,averageDailyVolume10Day,corporateActions,fiftyTwoWeekLowChange,fiftyTwoWeekLowChangePercent,fiftyTwoWeekRange,fiftyTwoWeekHighChange,fiftyTwoWeekHighChangePercent,fiftyTwoWeekChangePercent,dividendDate,earningsTimestamp,earningsTimestampStart,earningsTimestampEnd,earningsCallTimestampStart,earningsCallTimestampEnd,isEarningsDateEstimate,trailingAnnualDividendRate,trailingPE,dividendRate,trailingAnnualDividendYield,marketState,epsTrailingTwelveMonths,epsForward,epsCurrentYear,priceEpsCurrentYear,sharesOutstanding,bookValue,fiftyDayAverage,fiftyDayAverageChange,fiftyDayAverageChangePercent,twoHundredDayAverage,twoHundredDayAverageChange,twoHundredDayAverageChangePercent,marketCap,forwardPE,priceToBook,sourceInterval,exchangeDataDelayedBy,exchangeTimezoneName,exchangeTimezoneShortName,gmtOffSetMilliseconds,esgPopulated,tradeable,cryptoTradeable,displayName,symbol,ipoExpectedDate,prev_day_price,prev_day_volume,price_volume,historical_volatility,avg_iv,iv/hv_ratio,mean_openInterest_x,mean_openInterest_y,mean_openInterest,hmm_mean_0,hmm_std_0,hmm_mean_1,hmm_std_1,hmm_mean_2,hmm_std_2,pct_last30_in_highest_stdev_state,highest_stdev_state,pct_regimes_last30_higher_than_avg_iv,ShannonEntropy,Regimeswitchprob,Regimeswitchcount,ADF_Pvalue,AvgReturn_7d,Shannon/IV
0,en-US,US,EQUITY,Equity,Nasdaq Real Time Price,True,HIGH,USD,NVIDIA Corporation,173.61,182.83,-2.33316,NMS,183.3,86.62,1.4 - Strong Buy,0.02,True,917015400000,2,-0.70228,1754093000.0,172.5,-1.22,-4.14999,1754078401,173.72,176.54,170.89 - 176.54,170.89,202636888,177.87,4,1,us_market,finmb_32307,NasdaqGS,NVIDIA Corporation,USD,174.09,192781287,161557370,[],87.1,1.005541,86.62 - 183.3,-9.580002,-0.052264,72.94176,1751501000.0,1756325000.0,1756325000.0,1756325000.0,1756328000.0,1756328000.0,False,0.04,56.220066,0.04,0.000225,CLOSED,3.09,4.12,4.30288,40.372963,24387600384,3.438,154.5116,19.208405,0.124317,134.7648,38.9552,0.289061,4236614041600,42.16505,50.529377,15,0,America/New_York,EDT,-14400000,False,False,False,NVIDIA,NVDA,,177.869995,221685400,39431181015.55176,0.253826,0.394598,1.5546,9096.0,9096.0,9096.0,0.006675,0.031073,-0.004445,0.032405,0.110727,0.120742,0.0,2,100.0,0.833454,0.1,6,0.218884,Ticker NVDA 0.000003 dtype: float64,2.112159
1,en-US,US,EQUITY,Equity,Nasdaq Real Time Price,True,HIGH,USD,"Lucid Group, Inc.",2.4,2.47,-1.62601,NMS,4.43,1.93,3.1 - Hold,,True,1600435800000,4,0.206606,1754093000.0,2.425,0.005,-0.04,1754078401,2.42,2.44,2.32 - 2.44,2.32,91282600,2.46,488,575,us_market,finmb_83747444,NasdaqGS,"Lucid Group, Inc.",USD,2.375,152561575,131673640,[],0.49,0.253886,1.93 - 4.43,-2.01,-0.453725,-19.333332,,1754424000.0,1754424000.0,1754424000.0,1754429000.0,1754429000.0,False,0.0,,,0.0,CLOSED,-1.19,-0.88,-0.8878,-2.725839,3072480000,1.044,2.3908,0.0292,0.012214,2.516975,-0.096975,-0.038528,7435401728,-2.75,2.318008,15,0,America/New_York,EDT,-14400000,False,False,False,Lucid,LCID,2021-07-26,2.46,65248900,160512296.489048,1.334923,1.16016,0.869084,28425.25,28425.25,28425.25,0.003372,0.051561,-0.011354,0.040461,0.30257,0.106913,3.333333,2,3.333333,1.378179,0.14,7,0.780178,Ticker LCID -0.000083 dtype: float64,1.187921
2,en-US,US,EQUITY,Equity,Nasdaq Real Time Price,True,HIGH,USD,"Tesla, Inc.",302.56,302.84,-1.82956,NMS,488.54,182.0,2.7 - Hold,,True,1277818200000,2,-0.522095,1754093000.0,301.05,-1.58002,-5.639984,1754078401,302.63,309.31,297.82 - 309.31,297.82,88358015,308.27,4,1,us_market,finmb_27444752,NasdaqGS,"Tesla, Inc.",USD,306.205,107359361,100896010,[],120.630005,0.662802,182.0 - 488.54,-185.91,-0.380542,52.16714,,1753301000.0,1761163000.0,1761163000.0,1753306000.0,1753306000.0,True,0.0,179.071,,0.0,CLOSED,1.69,3.24,1.67868,180.27856,3225449984,23.981,322.9978,-20.367798,-0.063059,321.5904,-18.960388,-0.058958,976117956608,93.40432,12.619574,15,0,America/New_York,EDT,-14400000,False,False,False,Tesla,TSLA,,308.269989,85270900,26286459406.185917,0.532152,0.508855,0.956221,1248.25,1248.25,1248.25,-0.002173,0.042375,0.000958,0.040056,-0.017269,0.132887,0.0,2,100.0,0.758217,0.07,5,0.755459,Ticker TSLA -0.000007 dtype: float64,1.490045
3,en-US,US,EQUITY,Equity,Delayed Quote,False,LOW,USD,Ford Motor Company,10.79,10.81,-2.25836,NYQ,11.97,8.44,3.0 - Hold,5.55,True,76253400000,2,-0.184838,1754093000.0,10.8,-0.02,-0.25,1754078402,10.82,10.92,10.6822 - 10.92,10.6822,68916067,11.07,280,488,us_market,finmb_106335,NYSE,Ford Motor Company,USD,10.92,89785446,70396340,"[{'header': 'Dividend', 'message': 'F announce...",2.38,0.281991,8.44 - 11.97,-1.150001,-0.096074,11.431515,1748822000.0,1753906000.0,1761768000.0,1761768000.0,1753909000.0,1753909000.0,True,0.6,13.871795,0.6,0.054201,CLOSED,0.78,1.75,1.14611,9.440629,3909009920,11.321,10.8888,-0.0688,-0.006318,10.3356,0.4844,0.046867,43062083584,6.182857,0.955746,15,0,America/New_York,EDT,-14400000,False,False,False,,F,,11.07,101090900,1119076232.149506,0.254434,0.348639,1.370252,20423.75,20423.75,20423.75,0.001827,0.02131,0.002969,0.023229,-0.009944,0.063649,0.0,2,50.0,0.711742,0.08,4,0.829486,Ticker F -0.000015 dtype: float64,2.041484
4,en-US,US,EQUITY,Equity,Nasdaq Real Time Price,True,HIGH,USD,Intel Corporation,19.28,19.34,-2.47475,NMS,27.55,17.67,3.0 - Hold,,True,322151400000,2,-0.104603,1754093000.0,19.2898,-0.020199,-0.49,1754078400,19.31,19.55,18.965 - 19.55,18.965,86145589,19.8,18,18,us_market,finmb_21127,NasdaqGS,Intel Corporation,USD,19.472,81354487,99272340,[],1.639999,0.092813,17.67 - 27.55,-8.24,-0.299093,-3.978121,1725149000.0,1753387000.0,1753387000.0,1753387000.0,1753391000.0,1753391000.0,False,0.125,,,0.006313,CLOSED,-4.77,0.97,0.12594,153.32698,4376999936,22.363,21.526,-2.216,-0.102945,21.733,-2.423,-0.111489,84519870464,19.907215,0.86348,15,0,America/New_York,EDT,-14400000,False,False,False,Intel,INTC,,19.799999,90665200,1795170890.827942,0.46638,0.452642,0.970544,20577.75,20577.75,20577.75,0.067438,0.07315,-0.02066,0.065088,-0.003633,0.029751,0.0,0,100.0,1.009857,0.16,9,0.257073,Ticker INTC -0.000071 dtype: float64,2.231026
5,en-US,US,EQUITY,Equity,Nasdaq Real Time Price,True,HIGH,USD,Palantir Technologies Inc.,154.12,154.42,-2.576572,NMS,160.89,21.23,3.0 - Hold,,True,1601472600000,2,0.576782,1754093000.0,155.16,0.889801,-4.080002,1754078401,154.27,158.19,151.0576 - 158.19,151.0576,59911569,158.35,6,6,us_market,finmb_43580005,NasdaqGS,Palantir Technologies Inc.,USD,155.05,79287858,49283940,[],133.04001,6.266604,21.23 - 160.89,-6.619995,-0.041146,540.3902,,1754338000.0,1754338000.0,1754338000.0,1754341000.0,1754341000.0,False,0.0,670.73914,,0.0,CLOSED,0.23,0.47,0.58178,265.16898,2262909952,2.299,139.8546,14.415405,0.103074,96.59565,57.674355,0.59707,364063293440,328.23404,67.10309,15,0,America/New_York,EDT,-14400000,False,False,False,Palantir,PLTR,2024-11-26,158.350006,45342600,7180000986.749268,0.313068,1.025088,3.274335,2143.25,2143.25,2143.25,0.015767,0.028933,0.001592,0.040863,-0.024689,0.081732,3.333333,2,3.333333,0.442637,0.21,10,0.027734,Ticker PLTR 0.000002 dtype: float64,0.431804
6,en-US,US,EQUITY,Equity,Nasdaq Real Time Price,True,HIGH,USD,"SoFi Technologies, Inc.",21.0,22.79,-5.97874,NMS,25.11,6.01,2.8 - Hold,,True,1609770600000,2,-0.375882,1754093000.0,21.1502,-0.0798,-1.35,1754078401,21.23,21.76,20.6 - 21.76,20.6,89014566,22.58,1,2,us_market,finmb_141582707,NasdaqGS,"SoFi Technologies, Inc.",USD,21.7,71582688,93615510,[],15.219999,2.532446,6.01 - 25.11,-3.880001,-0.15452,224.61774,,1753792000.0,1761655000.0,1761655000.0,1753790000.0,1753790000.0,True,0.0,42.46,,0.0,CLOSED,0.5,0.29,0.31217,68.00781,1196179968,6.162,17.4368,3.7932,0.21754,14.5902,6.639799,0.455086,25394900992,73.206894,3.44531,15,0,America/New_York,EDT,-14400000,False,False,False,SoFi,SOFI,2021-06-01,22.58,120095300,2711751864.837456,0.470831,0.636234,1.3513,2317.0,2317.0,2317.0,0.008951,0.111168,-0.066168,0.057826,0.006413,0.034212,0.0,0,0.0,0.891635,0.2,10,0.138222,Ticker SOFI -0.000003 dtype: float64,1.401426
7,en-US,US,EQUITY,Equity,Nasdaq Real Time Price,True,HIGH,USD,"American Airlines Group, Inc.",11.01,11.07,-4.00348,NMS,19.1,8.5,2.1 - Buy,,True,1127827800000,2,0.0,1754093000.0,11.03,0.0,-0.46,1754078401,11.03,11.21,10.9 - 11.21,10.9,71082347,11.49,2,31,us_market,finmb_168569,NasdaqGS,American Airlines Group Inc.,USD,11.21,61488524,73583360,[],2.53,0.297647,8.5 - 19.1,-8.070001,-0.422513,18.857754,1582070000.0,1753360000.0,1761223000.0,1761223000.0,1753360000.0,1753360000.0,True,0.0,13.130953,,0.0,CLOSED,0.84,2.02,0.43114,25.583336,659828992,-5.865,11.53,-0.5,-0.043365,13.292,-2.262,-0.170178,7277913600,5.460396,-1.880648,15,0,America/New_York,EDT,-14400000,False,False,False,American Airlines,AAL,,11.49,84751000,973788970.602036,0.643543,0.469244,0.729156,3532.5,3532.5,3532.5,0.128325,0.097144,-0.072552,0.058652,-0.001553,0.027718,3.333333,0,10.0,1.052653,0.05,3,0.656404,Ticker AAL -0.00002 dtype: float64,2.243297
8,en-US,US,EQUITY,Equity,Delayed Quote,True,HIGH,USD,Apple Inc.,192.07,213.25,-2.500362,NMS,260.1,169.21,1.9 - Buy,0.51,True,345479400000,2,-0.039531,1754093000.0,202.3,-0.080002,-5.190002,1754078402,202.38,213.58,201.5 - 213.58,201.5,97203859,207.57,1,1,us_market,finmb_24937,NasdaqGS,Apple Inc.,USD,210.89,53580620,55084450,"[{'header': 'Dividend', 'message': 'AAPL annou...",33.17,0.196029,169.21 - 260.1,-57.72,-0.221915,-3.2924,1747267000.0,1753992000.0,1761854000.0,1761854000.0,1753996000.0,1753996000.0,True,1.01,30.663637,1.04,0.004866,CLOSED,6.6,8.31,7.38723,27.395926,14840399872,4.431,205.277,-2.896988,-0.014113,221.5797,-19.199692,-0.086649,3003400323072,24.35379,45.67366,15,0,America/New_York,EDT,-14400000,False,False,False,Apple,AAPL,,207.570007,80698400,16750567479.052734,0.143408,0.321967,2.245113,2029.75,2029.75,2029.75,0.001622,0.021692,-0.00443,0.023309,0.017797,0.110934,0.0,2,100.0,0.482193,0.04,2,0.994828,Ticker AAPL -0.000021 dtype: float64,1.497647
9,en-US,US,EQUITY,Equity,Nasdaq Real Time Price,True,HIGH,USD,"Advanced Micro Devices, Inc.",171.58,171.83,-2.61471,NMS,182.5,76.48,1.7 - Buy,,True,322151400000,2,-0.588232,1754093000.0,170.69,-1.00999,-4.61,1754078400,171.7,174.395,166.82 - 174.395,166.82,74849357,176.31,6,7,us_market,finmb_168864,NasdaqGS,"Advanced Micro Devices, Inc.",USD,170.14,51417843,61966810,[],95.21999,1.245031,76.48 - 182.5,-10.800003,-0.059178,27.354992,,1754424000.0,1754424000.0,1754424000.0,1754428000.0,1754428000.0,False,0.0,125.32846,,0.0,CLOSED,1.37,5.1,3.97856,43.15632,1621400064,35.817,137.7562,33.9438,0.246405,123.5056,48.194397,0.39022,278394372096,33.666668,4.793813,15,0,America/New_York,EDT,-14400000,False,False,False,Advanced Micro Devices,AMD,,176.309998,71765300,12652939867.791748,0.38335,0.767916,2.003173,1359.5,1359.5,1359.5,-0.058313,0.056973,0.133844,0.137063,0.005515,0.028071,0.0,1,0.0,0.351099,0.19,10,0.326865,Ticker AMD 0.000029 dtype: float64,0.457211


In [9]:
import nest_asyncio
nest_asyncio.apply()
import asyncio
from ib_insync import *
import pandas as pd
import numpy as np
import ta
import plotly.graph_objects as go

"""
Filter parameters
-----------------
VOL_WINDOW:       ATR period (bars) for volatility calculation (consider higher for daily)
VOL_THRESHOLD:    Minimum ATR percent (ATR/close) for a bar to be considered volatile enough for trading (e.g., 0.01 = 1%)
REGIME_THRESHOLD: Rolling mean of returns must be above this (e.g., -0.1) to be considered "trending" regime
ADX_PERIOD:       ADX calculation period (bars)
ADX_THRESHOLD:    Minimum ADX value to confirm market is trending (20 = classic)
SMA_PERIOD:       SMA bars for trend filter (only long if above, short if below)
"""

# ========= CHART SETTINGS =========
BAR_SIZE = '1 day'      # IBKR bar size for daily candles
DURATION = '5 Y'        # How far back to get data (e.g., '5 Y' = 5 years)
NUM_BARS = 2000         # Approximate daily bars in 5 years
# ================================

# Filter tuning (adjust to your needs)
VOL_WINDOW       = 30        # ATR lookback period (bars); for daily, 14-30 is common
VOL_THRESHOLD    = 0.001     # Minimum ATR% for volatility filter (e.g., 0.01=1%)
REGIME_THRESHOLD = -0.1      # Regime filter threshold
ADX_PERIOD       = 14        # ADX lookback window (bars)
ADX_THRESHOLD    = 20        # Minimum ADX to allow signals (trend filter)
SMA_PERIOD       = 200       # SMA filter period (bars)

EXCHANGE    = 'SMART'
CURRENCY    = 'USD'
LOOKAHEAD   = 4
K_NEIGHBORS = 8

FEATURES = ['RSI', 'ADX', 'CCI', 'MACD']

filterdf['symbol'] = filterdf['symbol'].str.replace('-', ' ')

# ==== Data Fetch Function ====
async def fetch_ibkr_stock(symbol, exchange, currency, bar_size, duration, num_bars):
    """Fetches OHLCV bars from IBKR using specified bar size/duration."""
    ib = IB()
    await ib.connectAsync('127.0.0.1', 7497, clientId=np.random.randint(1000, 10000))
    contract = Stock(symbol, exchange, currency)
    await ib.qualifyContractsAsync(contract)
    bars = await ib.reqHistoricalDataAsync(
        contract=contract,
        endDateTime='',
        durationStr=duration,
        barSizeSetting=bar_size,
        whatToShow='TRADES',
        useRTH=True,
        formatDate=1,
        keepUpToDate=False
    )
    df = util.df(bars)
    df.set_index('date', inplace=True)
    df = df.tail(num_bars)
    ib.disconnect()
    return df


# ==== ML Functions and Filters ====
def lorentzian_distance(a, b):
    return np.sum(np.log(1 + np.abs(a - b)))


def volatility_filter(df, vol_window, vol_thresh):
    atr = ta.volatility.AverageTrueRange(df['high'], df['low'], df['close'], window=vol_window).average_true_range()
    atr_pct = atr / df['close']
    return atr_pct > vol_thresh


def regime_filter(df, threshold):
    close_returns = df['close'].pct_change()
    regime = close_returns.rolling(window=20).mean()
    return regime > threshold


def adx_filter(df, adx_period, adx_threshold):
    adx = ta.trend.ADXIndicator(df['high'], df['low'], df['close'], window=adx_period).adx()
    return adx > adx_threshold


def classify_lorentzian_knn_with_filters(
        df, features, sma_period,
        vol_window, vol_thresh,
        regime_thresh, adx_period, adx_thresh,
        n_neighbors=8, lookahead=4, max_bars_back=500):
    pred = np.full(len(df), np.nan)
    neighbors_info = {}

    closes = df['close'].values
    feature_mat = df[features].values
    sma = df['SMA'].values

    vol_filter = volatility_filter(df, vol_window, vol_thresh)
    regime_filt = regime_filter(df, regime_thresh)
    adx_filt = adx_filter(df, adx_period, adx_thresh)

    length = len(df)

    # Main loop - exclude last 'lookahead' bars
    for idx in range(max_bars_back, length - lookahead):
        if np.isnan(sma[idx]) or closes[idx] == 0:
            continue
        if not (vol_filter.iloc[idx] and regime_filt.iloc[idx] and adx_filt.iloc[idx]):
            continue
        anchor_start = max(0, idx - max_bars_back)
        anchor_indices = np.arange(anchor_start, idx)
        anchor_feats = feature_mat[anchor_indices, :]
        target = feature_mat[idx]
        dists = np.array([lorentzian_distance(target, anchor_feats[j]) for j in range(anchor_feats.shape[0])])
        if len(dists) < n_neighbors:
            continue
        knn_indices = dists.argsort()[:n_neighbors]
        valid = anchor_indices[knn_indices] + lookahead < length
        selected = anchor_indices[knn_indices][valid]
        if len(selected) == 0:
            continue
        y_train = (closes[selected + lookahead] > closes[selected]).astype(int) - \
                  (closes[selected + lookahead] < closes[selected]).astype(int)
        vote = y_train.sum()
        raw_signal = np.sign(vote) if vote != 0 else 0
        if raw_signal == 1 and closes[idx] > sma[idx]:
            pred[idx] = 1
        elif raw_signal == -1 and closes[idx] < sma[idx]:
            pred[idx] = -1
        else:
            pred[idx] = 0
        neighbors_info[idx] = y_train.tolist()

    # Explicit classification for last 'lookahead' bars
    for idx in range(length - lookahead, length):
        if np.isnan(sma[idx]) or closes[idx] == 0:
            continue
        if not (vol_filter.iloc[idx] and regime_filt.iloc[idx] and adx_filt.iloc[idx]):
            continue
        anchor_start = max(0, idx - max_bars_back)
        anchor_indices = np.arange(anchor_start, idx)
        anchor_feats = feature_mat[anchor_indices, :]
        target = feature_mat[idx]
        dists = np.array([lorentzian_distance(target, anchor_feats[j]) for j in range(anchor_feats.shape[0])])
        if len(dists) < n_neighbors:
            continue
        knn_indices = dists.argsort()[:n_neighbors]
        valid = anchor_indices[knn_indices] + lookahead < length
        selected = anchor_indices[knn_indices][valid]
        if len(selected) == 0:
            continue
        y_train = (closes[selected + lookahead] > closes[selected]).astype(int) - \
                  (closes[selected + lookahead] < closes[selected]).astype(int)
        vote = y_train.sum()
        raw_signal = np.sign(vote) if vote != 0 else 0
        if raw_signal == 1 and closes[idx] > sma[idx]:
            pred[idx] = 1
        elif raw_signal == -1 and closes[idx] < sma[idx]:
            pred[idx] = -1
        else:
            pred[idx] = 0
        neighbors_info[idx] = y_train.tolist()

    return pred, neighbors_info


# ==== Process symbols from filterdf ====
async def process_symbols_with_knn(filterdf):
    filterdf['pct_buy_last_8d'] = np.nan
    filterdf['pct_sell_last_8d'] = np.nan

    for i, row in filterdf.iterrows():
        symbol = row['symbol']  # Adjust if your symbol column is named differently
        print(f"Processing {symbol} ...")

        df = await fetch_ibkr_stock(symbol, EXCHANGE, CURRENCY, BAR_SIZE, DURATION, NUM_BARS)

        # Feature Engineering
        df['RSI'] = ta.momentum.RSIIndicator(df['close'], window=14).rsi()
        df['ADX'] = ta.trend.ADXIndicator(df['high'], df['low'], df['close'], window=14).adx()
        df['CCI'] = ta.trend.CCIIndicator(df['high'], df['low'], df['close'], window=20).cci()
        df['SMA'] = ta.trend.SMAIndicator(df['close'], window=SMA_PERIOD).sma_indicator()
        macd_ind = ta.trend.MACD(df['close'], window_slow=26, window_fast=12, window_sign=9)
        df['MACD'] = macd_ind.macd()
        df['MACD_signal'] = macd_ind.macd_signal()
        df['MACD_hist'] = macd_ind.macd_diff()
        df = df.dropna().reset_index()

        preds, _ = classify_lorentzian_knn_with_filters(
            df, FEATURES, SMA_PERIOD, VOL_WINDOW, VOL_THRESHOLD,
            REGIME_THRESHOLD, ADX_PERIOD, ADX_THRESHOLD,
            n_neighbors=K_NEIGHBORS, lookahead=LOOKAHEAD, max_bars_back=500
        )
        df['raw_pred'] = preds

        # Calculate % buy and sell signals in the last 8 days of predictions
        last_8 = df['raw_pred'].tail(8)
        pct_buy = (last_8 == 1).sum() / len(last_8) * 100 if len(last_8) > 0 else 0
        pct_sell = (last_8 == -1).sum() / len(last_8) * 100 if len(last_8) > 0 else 0

        filterdf.at[i, 'pct_buy_last_8d'] = pct_buy
        filterdf.at[i, 'pct_sell_last_8d'] = pct_sell

    return filterdf


# --- Example usage ---
# Make sure you have your filter dataframe loaded with a 'symbol' column
# Example:
# filterdf = pd.DataFrame({'symbol': ['GOOG', 'AAPL', 'MSFT']})

if __name__ == '__main__':
    import sys

    # Replace or load your filter dataframe here
    # For demo, you can define it manually or read from CSV

    df_results = asyncio.run(process_symbols_with_knn(filterdf))
    print(df_results[['symbol', 'pct_buy_last_8d', 'pct_sell_last_8d']])


Processing NVDA ...
Processing LCID ...
Processing TSLA ...
Processing F ...
Processing INTC ...
Processing PLTR ...
Processing SOFI ...
Processing AAL ...
Processing AAPL ...
Processing AMD ...
Processing APLD ...
Processing MARA ...
Processing NIO ...
Processing SMCI ...
Processing HOOD ...
Processing AMZN ...
Processing GOOGL ...
Processing VALE ...
Processing BBD ...
Processing QS ...
Processing BAC ...
Processing RIOT ...
Processing SNAP ...
Processing CLF ...
Processing T ...
Processing ITUB ...
Processing HBAN ...
Processing QUBT ...
Processing CLSK ...
Processing AMCR ...
Processing SBET ...
Processing CCL ...
Processing MU ...
Processing UBER ...
Processing MRVL ...
Processing AVGO ...
Processing CMCSA ...
Processing STLA ...
Processing CSCO ...
Processing MSFT ...
Processing KEY ...
Processing ADT ...
Processing VZ ...
Processing WFC ...
Processing LYFT ...
Processing RKT ...
Processing NKE ...
Processing FUBO ...
Processing EOSE ...
Processing NCLH ...
Processing COIN ...
Pr

In [10]:
filterdf

Unnamed: 0,language,region,quoteType,typeDisp,quoteSourceName,triggerable,customPriceAlertConfidence,currency,shortName,bid,ask,regularMarketChangePercent,exchange,fiftyTwoWeekHigh,fiftyTwoWeekLow,averageAnalystRating,dividendYield,hasPrePostMarketData,firstTradeDateMilliseconds,priceHint,postMarketChangePercent,postMarketTime,postMarketPrice,postMarketChange,regularMarketChange,regularMarketTime,regularMarketPrice,regularMarketDayHigh,regularMarketDayRange,regularMarketDayLow,regularMarketVolume,regularMarketPreviousClose,bidSize,askSize,market,messageBoardId,fullExchangeName,longName,financialCurrency,regularMarketOpen,averageDailyVolume3Month,averageDailyVolume10Day,corporateActions,fiftyTwoWeekLowChange,fiftyTwoWeekLowChangePercent,fiftyTwoWeekRange,fiftyTwoWeekHighChange,fiftyTwoWeekHighChangePercent,fiftyTwoWeekChangePercent,dividendDate,earningsTimestamp,earningsTimestampStart,earningsTimestampEnd,earningsCallTimestampStart,earningsCallTimestampEnd,isEarningsDateEstimate,trailingAnnualDividendRate,trailingPE,dividendRate,trailingAnnualDividendYield,marketState,epsTrailingTwelveMonths,epsForward,epsCurrentYear,priceEpsCurrentYear,sharesOutstanding,bookValue,fiftyDayAverage,fiftyDayAverageChange,fiftyDayAverageChangePercent,twoHundredDayAverage,twoHundredDayAverageChange,twoHundredDayAverageChangePercent,marketCap,forwardPE,priceToBook,sourceInterval,exchangeDataDelayedBy,exchangeTimezoneName,exchangeTimezoneShortName,gmtOffSetMilliseconds,esgPopulated,tradeable,cryptoTradeable,displayName,symbol,ipoExpectedDate,prev_day_price,prev_day_volume,price_volume,historical_volatility,avg_iv,iv/hv_ratio,mean_openInterest_x,mean_openInterest_y,mean_openInterest,hmm_mean_0,hmm_std_0,hmm_mean_1,hmm_std_1,hmm_mean_2,hmm_std_2,pct_last30_in_highest_stdev_state,highest_stdev_state,pct_regimes_last30_higher_than_avg_iv,ShannonEntropy,Regimeswitchprob,Regimeswitchcount,ADF_Pvalue,AvgReturn_7d,Shannon/IV,pct_buy_last_8d,pct_sell_last_8d
0,en-US,US,EQUITY,Equity,Nasdaq Real Time Price,True,HIGH,USD,NVIDIA Corporation,173.61,182.83,-2.33316,NMS,183.3,86.62,1.4 - Strong Buy,0.02,True,917015400000,2,-0.70228,1754093000.0,172.5,-1.22,-4.14999,1754078401,173.72,176.54,170.89 - 176.54,170.89,202636888,177.87,4,1,us_market,finmb_32307,NasdaqGS,NVIDIA Corporation,USD,174.09,192781287,161557370,[],87.1,1.005541,86.62 - 183.3,-9.580002,-0.052264,72.94176,1751501000.0,1756325000.0,1756325000.0,1756325000.0,1756328000.0,1756328000.0,False,0.04,56.220066,0.04,0.000225,CLOSED,3.09,4.12,4.30288,40.372963,24387600384,3.438,154.5116,19.208405,0.124317,134.7648,38.9552,0.289061,4236614041600,42.16505,50.529377,15,0,America/New_York,EDT,-14400000,False,False,False,NVIDIA,NVDA,,177.869995,221685400,39431181015.55176,0.253826,0.394598,1.5546,9096.0,9096.0,9096.0,0.006675,0.031073,-0.004445,0.032405,0.110727,0.120742,0.0,2,100.0,0.833454,0.1,6,0.218884,Ticker NVDA 0.000003 dtype: float64,2.112159,100.0,0.0
1,en-US,US,EQUITY,Equity,Nasdaq Real Time Price,True,HIGH,USD,"Lucid Group, Inc.",2.4,2.47,-1.62601,NMS,4.43,1.93,3.1 - Hold,,True,1600435800000,4,0.206606,1754093000.0,2.425,0.005,-0.04,1754078401,2.42,2.44,2.32 - 2.44,2.32,91282600,2.46,488,575,us_market,finmb_83747444,NasdaqGS,"Lucid Group, Inc.",USD,2.375,152561575,131673640,[],0.49,0.253886,1.93 - 4.43,-2.01,-0.453725,-19.333332,,1754424000.0,1754424000.0,1754424000.0,1754429000.0,1754429000.0,False,0.0,,,0.0,CLOSED,-1.19,-0.88,-0.8878,-2.725839,3072480000,1.044,2.3908,0.0292,0.012214,2.516975,-0.096975,-0.038528,7435401728,-2.75,2.318008,15,0,America/New_York,EDT,-14400000,False,False,False,Lucid,LCID,2021-07-26,2.46,65248900,160512296.489048,1.334923,1.16016,0.869084,28425.25,28425.25,28425.25,0.003372,0.051561,-0.011354,0.040461,0.30257,0.106913,3.333333,2,3.333333,1.378179,0.14,7,0.780178,Ticker LCID -0.000083 dtype: float64,1.187921,25.0,25.0
2,en-US,US,EQUITY,Equity,Nasdaq Real Time Price,True,HIGH,USD,"Tesla, Inc.",302.56,302.84,-1.82956,NMS,488.54,182.0,2.7 - Hold,,True,1277818200000,2,-0.522095,1754093000.0,301.05,-1.58002,-5.639984,1754078401,302.63,309.31,297.82 - 309.31,297.82,88358015,308.27,4,1,us_market,finmb_27444752,NasdaqGS,"Tesla, Inc.",USD,306.205,107359361,100896010,[],120.630005,0.662802,182.0 - 488.54,-185.91,-0.380542,52.16714,,1753301000.0,1761163000.0,1761163000.0,1753306000.0,1753306000.0,True,0.0,179.071,,0.0,CLOSED,1.69,3.24,1.67868,180.27856,3225449984,23.981,322.9978,-20.367798,-0.063059,321.5904,-18.960388,-0.058958,976117956608,93.40432,12.619574,15,0,America/New_York,EDT,-14400000,False,False,False,Tesla,TSLA,,308.269989,85270900,26286459406.185917,0.532152,0.508855,0.956221,1248.25,1248.25,1248.25,-0.002173,0.042375,0.000958,0.040056,-0.017269,0.132887,0.0,2,100.0,0.758217,0.07,5,0.755459,Ticker TSLA -0.000007 dtype: float64,1.490045,0.0,0.0
3,en-US,US,EQUITY,Equity,Delayed Quote,False,LOW,USD,Ford Motor Company,10.79,10.81,-2.25836,NYQ,11.97,8.44,3.0 - Hold,5.55,True,76253400000,2,-0.184838,1754093000.0,10.8,-0.02,-0.25,1754078402,10.82,10.92,10.6822 - 10.92,10.6822,68916067,11.07,280,488,us_market,finmb_106335,NYSE,Ford Motor Company,USD,10.92,89785446,70396340,"[{'header': 'Dividend', 'message': 'F announce...",2.38,0.281991,8.44 - 11.97,-1.150001,-0.096074,11.431515,1748822000.0,1753906000.0,1761768000.0,1761768000.0,1753909000.0,1753909000.0,True,0.6,13.871795,0.6,0.054201,CLOSED,0.78,1.75,1.14611,9.440629,3909009920,11.321,10.8888,-0.0688,-0.006318,10.3356,0.4844,0.046867,43062083584,6.182857,0.955746,15,0,America/New_York,EDT,-14400000,False,False,False,,F,,11.07,101090900,1119076232.149506,0.254434,0.348639,1.370252,20423.75,20423.75,20423.75,0.001827,0.02131,0.002969,0.023229,-0.009944,0.063649,0.0,2,50.0,0.711742,0.08,4,0.829486,Ticker F -0.000015 dtype: float64,2.041484,0.0,0.0
4,en-US,US,EQUITY,Equity,Nasdaq Real Time Price,True,HIGH,USD,Intel Corporation,19.28,19.34,-2.47475,NMS,27.55,17.67,3.0 - Hold,,True,322151400000,2,-0.104603,1754093000.0,19.2898,-0.020199,-0.49,1754078400,19.31,19.55,18.965 - 19.55,18.965,86145589,19.8,18,18,us_market,finmb_21127,NasdaqGS,Intel Corporation,USD,19.472,81354487,99272340,[],1.639999,0.092813,17.67 - 27.55,-8.24,-0.299093,-3.978121,1725149000.0,1753387000.0,1753387000.0,1753387000.0,1753391000.0,1753391000.0,False,0.125,,,0.006313,CLOSED,-4.77,0.97,0.12594,153.32698,4376999936,22.363,21.526,-2.216,-0.102945,21.733,-2.423,-0.111489,84519870464,19.907215,0.86348,15,0,America/New_York,EDT,-14400000,False,False,False,Intel,INTC,,19.799999,90665200,1795170890.827942,0.46638,0.452642,0.970544,20577.75,20577.75,20577.75,0.067438,0.07315,-0.02066,0.065088,-0.003633,0.029751,0.0,0,100.0,1.009857,0.16,9,0.257073,Ticker INTC -0.000071 dtype: float64,2.231026,0.0,62.5
5,en-US,US,EQUITY,Equity,Nasdaq Real Time Price,True,HIGH,USD,Palantir Technologies Inc.,154.12,154.42,-2.576572,NMS,160.89,21.23,3.0 - Hold,,True,1601472600000,2,0.576782,1754093000.0,155.16,0.889801,-4.080002,1754078401,154.27,158.19,151.0576 - 158.19,151.0576,59911569,158.35,6,6,us_market,finmb_43580005,NasdaqGS,Palantir Technologies Inc.,USD,155.05,79287858,49283940,[],133.04001,6.266604,21.23 - 160.89,-6.619995,-0.041146,540.3902,,1754338000.0,1754338000.0,1754338000.0,1754341000.0,1754341000.0,False,0.0,670.73914,,0.0,CLOSED,0.23,0.47,0.58178,265.16898,2262909952,2.299,139.8546,14.415405,0.103074,96.59565,57.674355,0.59707,364063293440,328.23404,67.10309,15,0,America/New_York,EDT,-14400000,False,False,False,Palantir,PLTR,2024-11-26,158.350006,45342600,7180000986.749268,0.313068,1.025088,3.274335,2143.25,2143.25,2143.25,0.015767,0.028933,0.001592,0.040863,-0.024689,0.081732,3.333333,2,3.333333,0.442637,0.21,10,0.027734,Ticker PLTR 0.000002 dtype: float64,0.431804,25.0,0.0
6,en-US,US,EQUITY,Equity,Nasdaq Real Time Price,True,HIGH,USD,"SoFi Technologies, Inc.",21.0,22.79,-5.97874,NMS,25.11,6.01,2.8 - Hold,,True,1609770600000,2,-0.375882,1754093000.0,21.1502,-0.0798,-1.35,1754078401,21.23,21.76,20.6 - 21.76,20.6,89014566,22.58,1,2,us_market,finmb_141582707,NasdaqGS,"SoFi Technologies, Inc.",USD,21.7,71582688,93615510,[],15.219999,2.532446,6.01 - 25.11,-3.880001,-0.15452,224.61774,,1753792000.0,1761655000.0,1761655000.0,1753790000.0,1753790000.0,True,0.0,42.46,,0.0,CLOSED,0.5,0.29,0.31217,68.00781,1196179968,6.162,17.4368,3.7932,0.21754,14.5902,6.639799,0.455086,25394900992,73.206894,3.44531,15,0,America/New_York,EDT,-14400000,False,False,False,SoFi,SOFI,2021-06-01,22.58,120095300,2711751864.837456,0.470831,0.636234,1.3513,2317.0,2317.0,2317.0,0.008951,0.111168,-0.066168,0.057826,0.006413,0.034212,0.0,0,0.0,0.891635,0.2,10,0.138222,Ticker SOFI -0.000003 dtype: float64,1.401426,75.0,0.0
7,en-US,US,EQUITY,Equity,Nasdaq Real Time Price,True,HIGH,USD,"American Airlines Group, Inc.",11.01,11.07,-4.00348,NMS,19.1,8.5,2.1 - Buy,,True,1127827800000,2,0.0,1754093000.0,11.03,0.0,-0.46,1754078401,11.03,11.21,10.9 - 11.21,10.9,71082347,11.49,2,31,us_market,finmb_168569,NasdaqGS,American Airlines Group Inc.,USD,11.21,61488524,73583360,[],2.53,0.297647,8.5 - 19.1,-8.070001,-0.422513,18.857754,1582070000.0,1753360000.0,1761223000.0,1761223000.0,1753360000.0,1753360000.0,True,0.0,13.130953,,0.0,CLOSED,0.84,2.02,0.43114,25.583336,659828992,-5.865,11.53,-0.5,-0.043365,13.292,-2.262,-0.170178,7277913600,5.460396,-1.880648,15,0,America/New_York,EDT,-14400000,False,False,False,American Airlines,AAL,,11.49,84751000,973788970.602036,0.643543,0.469244,0.729156,3532.5,3532.5,3532.5,0.128325,0.097144,-0.072552,0.058652,-0.001553,0.027718,3.333333,0,10.0,1.052653,0.05,3,0.656404,Ticker AAL -0.00002 dtype: float64,2.243297,0.0,37.5
8,en-US,US,EQUITY,Equity,Delayed Quote,True,HIGH,USD,Apple Inc.,192.07,213.25,-2.500362,NMS,260.1,169.21,1.9 - Buy,0.51,True,345479400000,2,-0.039531,1754093000.0,202.3,-0.080002,-5.190002,1754078402,202.38,213.58,201.5 - 213.58,201.5,97203859,207.57,1,1,us_market,finmb_24937,NasdaqGS,Apple Inc.,USD,210.89,53580620,55084450,"[{'header': 'Dividend', 'message': 'AAPL annou...",33.17,0.196029,169.21 - 260.1,-57.72,-0.221915,-3.2924,1747267000.0,1753992000.0,1761854000.0,1761854000.0,1753996000.0,1753996000.0,True,1.01,30.663637,1.04,0.004866,CLOSED,6.6,8.31,7.38723,27.395926,14840399872,4.431,205.277,-2.896988,-0.014113,221.5797,-19.199692,-0.086649,3003400323072,24.35379,45.67366,15,0,America/New_York,EDT,-14400000,False,False,False,Apple,AAPL,,207.570007,80698400,16750567479.052734,0.143408,0.321967,2.245113,2029.75,2029.75,2029.75,0.001622,0.021692,-0.00443,0.023309,0.017797,0.110934,0.0,2,100.0,0.482193,0.04,2,0.994828,Ticker AAPL -0.000021 dtype: float64,1.497647,0.0,37.5
9,en-US,US,EQUITY,Equity,Nasdaq Real Time Price,True,HIGH,USD,"Advanced Micro Devices, Inc.",171.58,171.83,-2.61471,NMS,182.5,76.48,1.7 - Buy,,True,322151400000,2,-0.588232,1754093000.0,170.69,-1.00999,-4.61,1754078400,171.7,174.395,166.82 - 174.395,166.82,74849357,176.31,6,7,us_market,finmb_168864,NasdaqGS,"Advanced Micro Devices, Inc.",USD,170.14,51417843,61966810,[],95.21999,1.245031,76.48 - 182.5,-10.800003,-0.059178,27.354992,,1754424000.0,1754424000.0,1754424000.0,1754428000.0,1754428000.0,False,0.0,125.32846,,0.0,CLOSED,1.37,5.1,3.97856,43.15632,1621400064,35.817,137.7562,33.9438,0.246405,123.5056,48.194397,0.39022,278394372096,33.666668,4.793813,15,0,America/New_York,EDT,-14400000,False,False,False,Advanced Micro Devices,AMD,,176.309998,71765300,12652939867.791748,0.38335,0.767916,2.003173,1359.5,1359.5,1359.5,-0.058313,0.056973,0.133844,0.137063,0.005515,0.028071,0.0,1,0.0,0.351099,0.19,10,0.326865,Ticker AMD 0.000029 dtype: float64,0.457211,75.0,0.0


In [11]:
import datetime
filterdf["AvgReturn_7d"] = 0 
today = datetime.datetime.today().strftime('%Y-%m-%d')
filename = f'/Users/nshaffer/Desktop/equity_vol_screen_{today}.csv'
filterdf.to_csv(filename, index=False)
print(f"Results saved to {filename}")


Results saved to /Users/nshaffer/Desktop/equity_vol_screen_2025-08-02.csv
