In [7]:
import yfinance as yf 
from yfinance import EquityQuery
import numpy as np
import pandas as pd
import time

size = 5
rsi_window = 14
roc_window = 5
volatility_window = 30

In [None]:
from yfinance import EquityQuery, screen
import pandas as pd
import datetime
import time

# --- Your original functions ---
def find_closest_value(data_list, target_value):
    if len(data_list) == 0:
        return None
    return min(data_list, key=lambda x: abs(x - target_value))

def filter_special_expiry(expiry_list):
    sorted_expiries = sorted(expiry_list)
    length = len(sorted_expiries)
    if length == 0:
        return []
    values_to_keep = set()
    if length > 1:
        values_to_keep.add(sorted_expiries[1])  # 2nd lowest
    values_to_keep.add(sorted_expiries[length // 2])  # middle
    if length >= 3:
        values_to_keep.add(sorted_expiries[-3])  # 3rd to last
    values_to_keep.add(sorted_expiries[-1])  # last
    return list(values_to_keep)

def get_options(symbol, max_retries=3, retry_delay=60):
    for attempt in range(max_retries):
        try:
            stock = yf.Ticker(symbol)
            options_chain = stock.options
            if not options_chain:
                return pd.DataFrame(columns=['strike', 'impliedVolatility', 'openInterest'])
            filtered_expiries = filter_special_expiry(options_chain)
            options_data = []
            current_price = stock.history(period='1d')['Close'].iloc[-1]
            for expiry in filtered_expiries:
                chain = stock.option_chain(expiry)
                for opt_type, df in [('call', chain.calls), ('put', chain.puts)]:
                    if not df.empty:
                        closest_strike = find_closest_value(df['strike'], current_price)
                        filtered = df[df['strike'] == closest_strike].copy()
                        filtered['type'] = opt_type
                        filtered['expiry'] = expiry
                        filtered['symbol'] = symbol
                        options_data.append(filtered)
            if options_data:
                result = pd.concat(options_data, ignore_index=True)
                return result[['strike', 'impliedVolatility', 'openInterest']]
            else:
                return pd.DataFrame(columns=['strike', 'impliedVolatility', 'openInterest'])
        except Exception as e:
            print(f"Error fetching options for {symbol}: {e}")
            if attempt < max_retries - 1:
                time.sleep(retry_delay)
                continue
            else:
                print(f"Max retries reached for {symbol}.")
                return pd.DataFrame(columns=['strike', 'impliedVolatility', 'openInterest'])
    return pd.DataFrame(columns=['strike', 'impliedVolatility', 'openInterest'])

# --- Your original screen query ---
screen_query = EquityQuery("and", [
    EquityQuery("is-in", ["region", "us"]),
    EquityQuery("is-in", ["sector",
        "Technology",
        "Financial Services",
        "Consumer Cyclical",
        "Communication Services",
        "Basic Materials",
        "Industrials"
    ]),
    EquityQuery("GTE", ["eodprice", 0.50]),
    EquityQuery("GTE", ["avgdailyvol3m", 100000]),
    EquityQuery("LTE", ["short_percentage_of_float.value", 100])
])

# --- Batch processing function ---
def process_batch(offset, filterdf, options):
    batch_size = 100
    screener = screen(
        screen_query,
        sortField="avgdailyvol3m",
        sortAsc=False,
        size=batch_size,
        offset=offset
    )
    stocks = screener.get('quotes', [])
    if not stocks:
        print("No more stocks to load.")
        return filterdf, options, offset

    current_filterdf = pd.DataFrame(stocks)
    # ... (drop columns as before)

    filterstocks = current_filterdf['symbol'].tolist()
    options_list = []
    for symbol in filterstocks:
        options_data = get_options(symbol)
        options_list.append(options_data)

    current_options = pd.concat(options_list, ignore_index=True)
    current_options = current_options[current_options["openInterest"] > 100]

    options = pd.concat([options, current_options], ignore_index=True)

    # Robust grouping: check for required columns and non-empty DataFrame
    if not options.empty and 'symbol' in options.columns and 'impliedVolatility' in options.columns:
        avg_iv = options.groupby('symbol')['impliedVolatility'].mean().reset_index()
        avg_iv.columns = ['symbol', 'avg_iv']
    else:
        avg_iv = pd.DataFrame(columns=['symbol', 'avg_iv'])

    current_filterdf['symbol'] = current_filterdf['symbol'].astype(str).str.upper()
    avg_iv['symbol'] = avg_iv['symbol'].astype(str).str.upper()
    current_filterdf = current_filterdf.merge(avg_iv, on='symbol', how='left')

    filterdf = pd.concat([filterdf, current_filterdf], ignore_index=True)

    # Only rename if 'hvol' exists
    if 'hvol' in filterdf.columns:
        filterdf = filterdf.rename(columns={'hvol': 'historical_volatility', 'avg_iv': 'average_implied_volatility'})
        # Only calculate iv/hv_ratio if both columns exist
        if 'average_implied_volatility' in filterdf.columns and 'historical_volatility' in filterdf.columns:
            filterdf["iv/hv_ratio"] = filterdf["average_implied_volatility"] / filterdf["historical_volatility"]
        else:
            filterdf["iv/hv_ratio"] = None  # or handle as you prefer
    else:
        # Handle the case where 'hvol' is missing
        print("Warning: 'hvol' column not found. Skipping historical volatility calculations.")
        filterdf["historical_volatility"] = None
        filterdf["iv/hv_ratio"] = None


    offset += batch_size
    return filterdf, options, offset


# --- Main loop ---
offset = 0
filterdf = pd.DataFrame()
options = pd.DataFrame()

# Process the first batch
filterdf, options, offset = process_batch(offset, filterdf, options)
today = datetime.datetime.today().strftime('%Y-%m-%d')
filename = f'/Users/nshaffer/Desktop/equity_vol_screen_{today}.csv'
filterdf.to_csv(filename, index=False)
print(f"Initial batch saved to {filename}")
print(filterdf)

# Loop to process more batches
while True:
    user_input = input("Process next 100 stocks? (y/n): ")
    if user_input.lower() != 'y':
        break
    filterdf, options, offset = process_batch(offset, filterdf, options)
    filterdf.to_csv(filename, index=False)
    print(f"Updated results saved to {filename}")
    print(filterdf)
