<a href="https://colab.research.google.com/github/markhayes0811/Stock/blob/main/TopVolatileStocks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install lxml tqdm

import requests
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
from concurrent.futures import ThreadPoolExecutor
from tqdm.notebook import tqdm
from IPython.display import display

# Polygon API key
API_KEY = "H3nRWzRqnMqojU9y1gkbo1UqTbl2peqf"
BASE_URL = "https://api.polygon.io"

# Settings
LIMIT = 50
DAYS = 10  # Use 10 trading days for better volatility estimate

# Load S&P 500 from Wikipedia (as a fallback universe)
def load_sp500():
    url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
    tables = pd.read_html(url)
    return tables[0]['Symbol'].tolist()

# Load active tickers from Polygon and filter for valid equities
def load_active_tickers():
    tickers = set()
    for group in ["gainers", "losers"]:
        url = f"{BASE_URL}/v2/snapshot/locale/us/markets/stocks/{group}"
        params = {"apiKey": API_KEY}
        try:
            res = requests.get(url, params=params)
            res.raise_for_status()
            data = res.json().get("tickers", [])
            for t in data:
                symbol = t.get("ticker", "")
                if symbol.isalpha():
                    tickers.add(symbol)
        except Exception as e:
            print(f"Error fetching {group}: {e}")
    print(f"Loaded {len(tickers)} filtered active tickers from Polygon.")
    return list(tickers)

# Fetch daily data from Polygon for a single ticker
def fetch_daily_data(ticker):
    now = datetime.utcnow()
    from_time = (now - timedelta(days=DAYS*2)).strftime("%Y-%m-%d")
    to_time = now.strftime("%Y-%m-%d")

    url = f"{BASE_URL}/v2/aggs/ticker/{ticker}/range/1/day/{from_time}/{to_time}"
    params = {
        "adjusted": "true",
        "sort": "asc",
        "limit": 500,
        "apiKey": API_KEY
    }
    try:
        res = requests.get(url, params=params)
        res.raise_for_status()
        data = res.json()
        if 'results' not in data:
            print(f"No results for {ticker}: {data}")
            return None
        closes = [bar['c'] for bar in data['results']][-DAYS:]
        if len(closes) < 2:
            print(f"Not enough data for {ticker}.")
            return None
        returns = np.diff(closes) / closes[:-1]
        volatility = np.std(returns)
        return (ticker, volatility)
    except Exception as e:
        print(f"Error fetching {ticker}: {e}")
        failed_tickers.append(ticker)
        return None

# Main function to run everything
def get_top_volatile_stocks():
    active_tickers = load_active_tickers()
    fallback_tickers = load_sp500()
    tickers = list(set(active_tickers + fallback_tickers))

    print(f"Using {len(tickers)} total tickers (active + S&P 500).")

    global failed_tickers
    failed_tickers = []
    results = []
    with ThreadPoolExecutor(max_workers=10) as executor:
        for result in tqdm(executor.map(fetch_daily_data, tickers), total=len(tickers)):
            if result:
                results.append(result)

    print(f"Fetched data for {len(results)} tickers with valid daily price info.")

    if not results:
        print("No valid data collected. Exiting.")
        return pd.DataFrame()

    sorted_results = sorted(results, key=lambda x: x[1], reverse=True)
    top_volatile = sorted_results[:LIMIT]
    df = pd.DataFrame(top_volatile, columns=["Ticker", "Volatility"])
    display(df)

    df.to_csv("top_volatile_stocks.csv", index=False)
    try:
        from google.colab import files
        files.download("top_volatile_stocks.csv")
    except ImportError:
        pass

    if failed_tickers:
        pd.DataFrame(failed_tickers, columns=["Failed Ticker"]).to_csv("failed_tickers.csv", index=False)
        try:
            files.download("failed_tickers.csv")
        except ImportError:
            pass

    return df

if __name__ == "__main__":
    get_top_volatile_stocks()


Loaded 42 filtered active tickers from Polygon.
Using 543 total tickers (active + S&P 500).


  0%|          | 0/543 [00:00<?, ?it/s]

No results for AIM: {'ticker': 'AIM', 'queryCount': 0, 'resultsCount': 0, 'adjusted': True, 'status': 'DELAYED', 'request_id': '131e25d9d9e4d315b77a3d141afb95cc'}
Fetched data for 542 tickers with valid daily price info.


Unnamed: 0,Ticker,Volatility
0,KWMWW,1.142214
1,RBNE,1.033929
2,RGC,0.891476
3,STSSW,0.635927
4,XTIA,0.281244
5,LDTCW,0.243538
6,INEO,0.211185
7,MTEKW,0.206949
8,SAFX,0.183933
9,SDSTW,0.176127


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>