In [3]:
import yfinance as yf
import requests
import io
import pandas as pd
import time

In [4]:
NASDAQLISTED_URL = "https://www.nasdaqtrader.com/dynamic/symdir/nasdaqlisted.txt"
OTHERLISTED_URL  = "https://www.nasdaqtrader.com/dynamic/symdir/otherlisted.txt"


In [None]:


class TickerInfo:
    def __init__(self, urls: list[str], params=None):
        self.urls = urls
        self.params = params or {
            "BATCH_SIZE": 200,
            "ALPACA_SNAPSHOT_BATCH": 200,
            "PRICE_MIN": 1.0,
            "PRICE_MAX": 20.0,
            "DVOL_MIN": 300_000.0,
            "DVOL_MAX": 20_000_000.0,
            "SLEEP_S": 1,
            "ALPACA_DATA_BASE_URL": "https://data.alpaca.markets",
        }
        self.symbols = self.get_symbols()
        
    def get_symbols(self) -> list[str]:
        ls = []
        for url in self.urls:
            df = self._download_ticker_list(url)
            if 'ACT Symbol' in df.columns:
                ls.extend(df['ACT Symbol'].tolist())
            else:
                ls.extend(df['Symbol'].tolist())

        if len(ls) != len(set(ls)):
            print("Warning: Duplicate symbols found!")

        ls = [str(s).strip().upper() for s in ls if str(s).lower() != "nan" and "$" not in str(s)]
        return ls
    
    def _download_ticker_list(self, url: str) -> pd.DataFrame:
        r = requests.get(url, timeout=30)
        r.raise_for_status()
        lines = [ln for ln in r.text.splitlines() if ln.strip() and not ln.startswith("File Creation Time")]
        data = io.StringIO("\n".join(lines))
        df = pd.read_csv(data, sep="|")
        return df

    def filter_with_alpaca(self, auth: dict) -> list[str]:
        alpaca_base = auth.get("ALPACA_DATA_BASE_URL", self.params["ALPACA_DATA_BASE_URL"]).rstrip("/")
        headers = {
            "APCA-API-KEY-ID": auth["ALPACA_KEY"],
            "APCA-API-SECRET-KEY": auth["ALPACA_SECRET"],
        }

        price_min = float(self.params["PRICE_MIN"])
        price_max = float(self.params["PRICE_MAX"])
        dvol_min = float(self.params["DVOL_MIN"])
        dvol_max = float(self.params["DVOL_MAX"])
        snap_batch = int(self.params.get("ALPACA_SNAPSHOT_BATCH", 100))
        sleep_s = float(self.params.get("SLEEP_S", 1))

        keep = []
        for i in range(0, len(self.symbols), snap_batch):
            batch = self.symbols[i:i + snap_batch]
            res = requests.get(
                f"{alpaca_base}/v2/stocks/snapshots",
                params={"symbols": ",".join(batch)},
                headers=headers,
                timeout=30,
            )
            res.raise_for_status()
            data = res.json()
            snaps = data.get("snapshots", data)

            for sym, snap in snaps.items():
                daily = (snap or {}).get("dailyBar") or {}
                px = daily.get("c")
                vol = daily.get("v")
                if px is None or vol is None:
                    continue
                dv = float(px) * float(vol)
                if price_min <= float(px) <= price_max and dvol_min <= dv <= dvol_max:
                    keep.append(sym)

            time.sleep(sleep_s)

        self.symbols = keep
        print(f"Alpaca filter kept {len(keep)} tickers")
        return keep
    
    def get_market_caps(self, auth: dict) -> dict[str, int]:
        if auth.get("ALPACA_KEY") and auth.get("ALPACA_SECRET"):
            symbols = self.filter_with_alpaca(auth)


        market_caps = {}
        sleep_s = float(self.params.get("SLEEP_S", 0.25))

        print(f"Fetching market caps one-by-one from yfinance for {len(symbols)} tickers")
        for idx, sym in enumerate(symbols):
            try:
                info = yf.Ticker(sym.replace(".", "-")).get_info()
                mc = info.get("marketCap")
                if mc is not None:
                    market_caps[sym] = mc
            except Exception as e:
                print(f"yfinance error {sym}: {e}")
            time.sleep(sleep_s)

        print(f"yfinance returned market cap for {len(market_caps)} tickers")
        return market_caps


tickers = TickerInfo([NASDAQLISTED_URL, OTHERLISTED_URL])


In [None]:
caps = tickers.get_market_caps({"FMP_API_KEY": "uiuXbEM2b20GFrw8lo8fnfYQFYAVEVyF", "ALPACA_KEY": "PKD3JVCPOZEKUW56GIVTRFBN63", "ALPACA_SECRET": "ECGQC7Fz3drugG8F6RbyJeX69cZ4oWsAFxMa6q8Qe3E9"})

Alpaca filter kept 788 tickers
Fetching market caps one-by-one from yfinance for 788 tickers


In [10]:
with open("capdata.json", "w") as f:
    import json
    json.dump(caps, f, indent=4)

In [14]:
caps.keys()

dict_keys(['AAL', 'AAPL', 'ADBE', 'AMD', 'AMZN', 'BIDU', 'BILI', 'COIN', 'COST', 'CPRX', 'CSCO', 'DOCU', 'GOOGL', 'HOOD', 'INTC', 'LCID', 'META', 'MRNA', 'MSFT', 'NFLX', 'NVDA', 'PEP', 'PLTR', 'PYPL', 'RIOT', 'RIVN', 'ROKU', 'SBUX', 'SHOP', 'SIRI', 'SOFI', 'TLRY', 'TSLA', 'UAL', 'WMT', 'ZM', 'ABBV', 'BA', 'BABA', 'BAC', 'C', 'CARR', 'CCL', 'CVX', 'DAL', 'DIS', 'ET', 'ETSY', 'F', 'FDX', 'FUBO', 'GE', 'GM', 'GS', 'HCA', 'JNJ', 'JPM', 'KO', 'LMT', 'MGM', 'NIO', 'NKE', 'NOK', 'PFE', 'PINS', 'RBLX', 'RKT', 'SNAP', 'SONY', 'SPY', 'SPYG', 'T', 'TGT', 'TSM', 'UBER', 'UNH', 'V', 'VWO', 'VZ', 'WFC', 'XOM'])

In [None]:
import requests
key = "uiuXbEM2b20GFrw8lo8fnfYQFYAVEVyF"
res = requests.get(f"https://financialmodelingprep.com/stable/market-capitalization-batch?symbols=AAPL,MSFT,GOOG&apikey={key}")

In [34]:
",".join(tickers.symbols[:10])

'AACB,AACBR,AACBU,AACG,AADR,AAEQ,AAL,AALG,AAME,AAOI'

In [29]:
res.json()

[{'symbol': 'AAPL', 'date': '2026-01-09', 'marketCap': 3832542658416.9995},
 {'symbol': 'MSFT', 'date': '2026-01-09', 'marketCap': 3562492932151.0005}]

In [None]:

ticker = yf.Ticker("AAPL")
market_cap = ticker.info["marketCap"]