In [None]:
"""
================================================================================
SCRIPT: 03v1_ingest_eodoptionschain_intrinio.py
PHASE:  01 - Data Ingestion
PIPELINE: UOA (Unusual Options Activity) Research Pipeline
================================================================================

PURPOSE
-------
Siphons End-of-Day (EOD) historical options chain data from Intrinio's API.
This script is one of three data ingestion components in Phase 01:
    (A) Options trades data          - from Massive S3 (trade-by-trade)
    (B) Underlying asset OHLCV       - from Massive S3 (daily bars)
    (C) EOD options chain data       - from Intrinio   (THIS SCRIPT)

The EOD options chain provides Greeks, implied volatility, open interest, and
pricing snapshots needed for contextualizing anomalous trades detected later
in the pipeline.

DATA SOURCE
-----------
Provider:   Intrinio (https://intrinio.com)
Endpoint:   get_options_prices_eod_by_ticker
API Docs:   https://docs.intrinio.com/documentation/python

CONFIGURATION PARAMETERS
------------------------
INTRINIO_API_KEY            : str   - API key for authentication
USE_TXTFILE_INSTEAD_OF_LIST : bool  - True = load tickers from file, False = use list
TICKERS_LIST                : list  - Hardcoded ticker symbols (if above is False)
TICKERS_FILE                : str   - Path to .txt file with one ticker per line
START_DATE                  : str   - Start of date range (YYYY-MM-DD)
END_DATE                    : str   - End of date range (YYYY-MM-DD)
OUTPUT_DIR                  : str   - Directory for output CSV files
TICKER_LIMIT                : int   - Cap on tickers to process (None = all)
PAGE_SIZE                   : int   - Records per API page (default: 100)
MAX_CALLS_PER_SECOND        : int   - Rate limit ceiling (default: 20)

INPUT
-----
- Ticker list: Either from TICKERS_LIST or external .txt file
- Date range:  Filtered to NYSE trading days via pandas_market_calendars

OUTPUT
------
Location:   {OUTPUT_DIR}/
Filename:   {TICKER}_{YYYY-MM-DD}.csv (e.g., CIFR_2025-12-01.csv)
Scope:      One file per ticker per trading day

OUTPUT COLUMNS (34 total)
-------------------------
Pricing:
    date              : Trading date (YYYY-MM-DD)
    close             : Closing price of the option
    close_bid         : Closing bid price
    close_ask         : Closing ask price
    open              : Opening price
    open_bid          : Opening bid price
    open_ask          : Opening ask price
    high              : Intraday high
    low               : Intraday low
    mark              : Mid price ((bid + ask) / 2)
    ask_high          : Highest ask of the day
    ask_low           : Lowest ask of the day
    bid_high          : Highest bid of the day
    bid_low           : Lowest bid of the day

Volume & Interest:
    volume            : Total contracts traded that day
    open_interest     : Outstanding open contracts

Greeks & Volatility:
    implied_volatility: IV from Black-Scholes model
    delta             : Price sensitivity to $1 underlying move
    gamma             : Rate of change of delta
    theta             : Time decay (daily)
    vega              : Sensitivity to 1% IV change

Timestamps & Sizes:
    close_time        : Timestamp of last trade before close
    close_size        : Size of last trade before close
    close_bid_time    : Timestamp of last bid before close
    close_bid_size    : Size of last bid before close
    close_ask_time    : Timestamp of last ask before close
    close_ask_size    : Size of last ask before close

Contract Identifiers:
    code              : Intrinio option code (e.g., MARA251205C00003000)
    ticker            : Underlying asset symbol
    expiration        : Option expiration date (YYYY-MM-DD)
    strike            : Strike price
    type              : 'call' or 'put'
    exercise_style    : 'A' (American) or 'E' (European)

Added by Script:
    quote_date        : Trading date for this record (redundant with date)

ARCHITECTURE
------------
Concurrency:  asyncio + ThreadPoolExecutor (CPU cores × 5 workers)
Rate Limit:   Semaphore-based, respects MAX_CALLS_PER_SECOND
Pagination:   Loops through next_page tokens until exhausted
Calendar:     NYSE trading days only (via pandas_market_calendars)

DEPENDENCIES
------------
pip install intrinio-sdk pandas pandas-market-calendars nest-asyncio

USAGE
-----
# Jupyter Notebook:
await main()

# Standard Python (wrap in asyncio.run if needed):
asyncio.run(main())

================================================================================
"""

import os
import asyncio
import pandas as pd
import pandas_market_calendars as mcal
from functools import partial
from concurrent.futures import ThreadPoolExecutor
import logging
from datetime import datetime
from pathlib import Path
import intrinio_sdk
from intrinio_sdk.rest import ApiException
import nest_asyncio

# Allow nested event loops in Jupyter
nest_asyncio.apply()

#############################################
# SETUP LOGGING
#############################################
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s [%(levelname)s] %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)

#############################################
# USER CONFIG - MODIFY THESE
#############################################

# API Key
INTRINIO_API_KEY = "OmMyNDhmMzlmZTU1OGE0ZmEwNTg4M2NlYzI4NmZlZTAx"

# Ticker source toggle
USE_TXTFILE_INSTEAD_OF_LIST = False  # True = use text file, False = use list below

# Option 1: Tickers list (used when USE_TXTFILE_INSTEAD_OF_LIST = False)
TICKERS_LIST = [
    'CIFR', 'MARA'
    # Add more tickers here
]

# Option 2: Text file path (used when USE_TXTFILE_INSTEAD_OF_LIST = True)
TICKERS_FILE = 'tickers.txt'

# Date range
START_DATE = "2025-12-01"
END_DATE = "2025-12-05"

# Output directory
OUTPUT_DIR = 'output/'

# Testing limit (set to None to process all tickers)
TICKER_LIMIT = 2  # Limit to 2 for testing, set to None for all

#############################################
# API CONFIG
#############################################
PAGE_SIZE = 100
MAX_CALLS_PER_SECOND = 20

# Setup Intrinio client
intrinio_api_client = intrinio_sdk.ApiClient()
intrinio_api_client.configuration.api_key['api_key'] = INTRINIO_API_KEY
intrinio_options_api = intrinio_sdk.OptionsApi(intrinio_api_client)

# Create output directory if it doesn't exist
Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True)

#############################################
# LOAD TICKERS
#############################################
def load_tickers():
    if USE_TXTFILE_INSTEAD_OF_LIST:
        logger.info(f"Loading tickers from file: {TICKERS_FILE}")
        with open(TICKERS_FILE, 'r') as f:
            tickers = [x.strip() for x in f.readlines() if x.strip()]
    else:
        logger.info("Loading tickers from list in script")
        tickers = TICKERS_LIST.copy()
    
    if TICKER_LIMIT is not None:
        tickers = tickers[:TICKER_LIMIT]
    
    return tickers

#############################################
# SAVE CSV LOCALLY
#############################################
async def save_csv(symbol, quote_date, df):
    file_path = f"{OUTPUT_DIR}{symbol}_{quote_date}.csv"
    df.to_csv(file_path, index=False)
    logger.info(f"✓ Saved {symbol} {quote_date} ({len(df)} rows)")

#############################################
# GLOBAL EXECUTOR FOR SDK CALLS
#############################################
executor = ThreadPoolExecutor(max_workers=os.cpu_count() * 5)

#############################################
# RATE LIMITER
#############################################
semaphore = asyncio.Semaphore(MAX_CALLS_PER_SECOND)

async def rate_limited():
    async with semaphore:
        await asyncio.sleep(1 / MAX_CALLS_PER_SECOND)

#############################################
# WRAPPER FOR INTRINIO SDK CALL
#############################################
async def fetch_page(symbol, quote_date, next_page):
    await rate_limited()
    loop = asyncio.get_event_loop()
    func = partial(
        intrinio_options_api.get_options_prices_eod_by_ticker,
        symbol,
        date=quote_date,
        page_size=PAGE_SIZE,
        include_related_symbols=False,
        next_page=next_page
    )
    return await loop.run_in_executor(executor, func)

#############################################
# FETCH DATA FOR A SINGLE DAY
#############################################
async def fetch_ticker_day(symbol, quote_date):
    all_prices = []
    next_page = ""
    
    while True:
        try:
            response = await fetch_page(symbol, quote_date, next_page)
        except ApiException as e:
            logger.error(f"API error for {symbol} {quote_date}: {e}")
            break
        if not response or not response.prices:
            break
        all_prices.extend([p.to_dict() for p in response.prices])
        next_page = response.next_page
        if not next_page:
            break
    
    if not all_prices:
        logger.warning(f"No data found for {symbol} {quote_date}")
        return
    
    df = pd.json_normalize(all_prices)
    df.columns = [c.replace("option.", "").replace("price.", "") for c in df.columns]
    df["ticker"] = symbol
    df["quote_date"] = quote_date
    await save_csv(symbol, quote_date, df)

#############################################
# PROGRESS TRACKER CLASS
#############################################
class ProgressTracker:
    def __init__(self, total_tickers, total_days):
        self.total_tickers = total_tickers
        self.total_days = total_days
        self.completed_tickers = 0
        self.start_time = datetime.now()
    
    def ticker_completed(self):
        self.completed_tickers += 1
        elapsed = (datetime.now() - self.start_time).total_seconds()
        avg_time = elapsed / self.completed_tickers if self.completed_tickers > 0 else 0
        remaining = self.total_tickers - self.completed_tickers
        eta_seconds = avg_time * remaining
        eta_mins = int(eta_seconds / 60)
        
        logger.info(f"Progress: {self.completed_tickers}/{self.total_tickers} tickers complete "
                   f"({self.completed_tickers/self.total_tickers*100:.1f}%) - ETA: {eta_mins} mins")

#############################################
# PROCESS ALL DAYS FOR A TICKER
#############################################
async def process_ticker(symbol, trading_days, progress_tracker):
    logger.info(f"Starting {symbol} ({len(trading_days)} days)")
    
    for d in trading_days:
        await fetch_ticker_day(symbol, d)
    
    progress_tracker.ticker_completed()

#############################################
# MAIN DRIVER
#############################################
async def main():
    logger.info("="*60)
    logger.info("Starting Options Data Download")
    logger.info("="*60)
    
    tickers = load_tickers()
    logger.info(f"Loaded {len(tickers)} tickers: {', '.join(tickers)}")
    
    nyse = mcal.get_calendar('NYSE')
    days = nyse.schedule(start_date=START_DATE, end_date=END_DATE).index
    trading_days = days.strftime("%Y-%m-%d").tolist()
    
    total_tasks = len(tickers) * len(trading_days)
    logger.info(f"Total: {len(tickers)} tickers × {len(trading_days)} days = {total_tasks} tasks")
    logger.info(f"Date range: {START_DATE} to {END_DATE}")
    logger.info("="*60)
    
    progress_tracker = ProgressTracker(len(tickers), len(trading_days))
    
    tasks = [process_ticker(s, trading_days, progress_tracker) for s in tickers]
    await asyncio.gather(*tasks)
    
    elapsed_mins = (datetime.now() - progress_tracker.start_time).total_seconds() / 60
    logger.info("="*60)
    logger.info(f"✓ ALL COMPLETE - Processed {len(tickers)} tickers in {elapsed_mins:.1f} minutes")
    logger.info("="*60)

# Run in Jupyter
await main()