In [1]:
# turn on autoreload
%load_ext autoreload
%autoreload 2

In [2]:
import os, logging, sys
from dotenv import load_dotenv, find_dotenv
from datetime import date
load_dotenv(find_dotenv(), override=True)

ROOT = os.path.abspath("..")          # if your notebook/ lives under project-root/notebook/
SRC  = os.path.join(ROOT, "src")
sys.path.append(SRC)


# quiet httpx unless you want network noise
#logging.getLogger("httpx").setLevel(logging.WARNING)

#from agents import Runner

from agent_strategy import strategy_agent
from agent_pricer import pricer_agent
from agent_coach import coach_agent
from tools_parser import parse_and_store_mvp, parse_and_store_mvp_impl, normalize_and_store_legs_impl
from bus import BUS                      # your file-backed KV/log bus

In [3]:
from typing import Optional
import json
import time
def bus_list(topic: str):
    keys = BUS.list_keys(topic)
    print(f"[{topic}] {len(keys)} key(s)")
    for k in keys:
        print("  -", k)

def bus_get(topic: str, key: str, path: Optional[str]=None):
    """Pretty-print an entry; optional 'path' lets you dive into payload (e.g., 'result', 'payload.result')."""
    val = BUS.get(topic=topic, key=key)
    if val is None:
        print(f"⛔ not found: {topic}:{key}")
        return None
    obj = val
    if path:
        for part in path.split("."):
            if isinstance(obj, dict) and part in obj:
                obj = obj[part]
            else:
                print(f"⚠️ path '{path}' not found; showing full object"); obj = val; break
    print(json.dumps(obj, indent=2, default=str)[:2000])
    return obj

def bus_put(topic: str, key: str, payload: dict, producer="notebook"):
    BUS.put(topic=topic, key=key, payload=payload, producer=producer)
    print(f"✅ wrote {topic}:{key}")

def bus_del(topic: str, key: str):
    # soft-delete: write a tombstone-like payload
    BUS.put(topic=topic, key=key, payload={"status":"deleted","ts":time.time()}, producer="notebook")
    print(f"🗑️  marked deleted {topic}:{key}")

def bus_clear_topic(topic: str):
    for k in BUS.list_keys(topic):
        bus_del(topic, k)

In [4]:
ref_date=date.today()
txt="parse strategy: translate this request: i want to trade 500 AAPL jan26 250 300 collars"
payload = txt.split(":", 1)[1].strip()
out = parse_and_store_mvp_impl(text=payload)  # direct Python call
out2=normalize_and_store_legs_impl(key=out["key"], ref_year=ref_date.year, ref_month=ref_date.month, ref_day=ref_date.day)
a=bus_get("legs",out2["key"])
legs=a["payload"]["legs"]

{
  "status": "done",
  "producer": "parser_py",
  "payload": {
    "ok": true,
    "quantity": 500,
    "symbol": "AAPL",
    "maturities": [
      "Jan26"
    ],
    "strikes": [
      250.0,
      300.0
    ],
    "ratio": "1x1",
    "structure": "custom",
    "legs": {
      "legs": [
        {
          "cp": "P",
          "strike": 250.0,
          "side": "AUTO",
          "ratio": 1,
          "qty": 500,
          "expiry": {
            "year": 2026,
            "month": 1,
            "iso": "2026-01-16",
            "label": "Jan26"
          }
        },
        {
          "cp": "C",
          "strike": 300.0,
          "side": "AUTO",
          "ratio": 1,
          "qty": 500,
          "expiry": {
            "year": 2026,
            "month": 1,
            "iso": "2026-01-16",
            "label": "Jan26"
          }
        }
      ],
      "issues": [],
      "notes": ""
    }
  }
}


In [22]:
out2

{'ok': True, 'key': 'AAPL-Jan26-custom', 'issues': []}

In [5]:
from datetime import date
from os import forkpty
from numpy.random import f
import pandas as pd


def numify(df, col, dtype="float64"):
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors="coerce").astype(dtype)
    else:
        df[col] = pd.Series(pd.NA, index=df.index, dtype=dtype)


def collect_market_and_resolve(
    symbol: str,
    trade_date: str,                    # 'YYYY-MM-DD'
    requested_legs: list[dict],         # [{"cp":"P","strike":250,"expiry":{"year":2026,"month":1,"iso":None}}, ...]
    getMarketData,                      # ivolatility method: setMethod('/equities/eod/stock-opts-by-param')
    dte_from: int = 0,
    dte_to: int = 760,
    mny_from: int = -90,
    mny_to: int = 90,
) -> dict:
    """
    Returns:
      {
        "ok": bool,
        "dataset": [ {optionId, symbol, cp, expiration, strike, bid, ask, iv, ...} ],
        "issues": [ ... ],
        "chain_ref": df  # cleaned DataFrame for charts
      }
    """
    issues = []
    sym = symbol.upper()

    # 1) Bulk fetch calls & puts
    calls = getMarketData(symbol=sym, tradeDate=trade_date, dteFrom=dte_from, dteTo=dte_to,
                          moneynessFrom=mny_from, moneynessTo=mny_to, cp='C')
    puts  = getMarketData(symbol=sym, tradeDate=trade_date, dteFrom=dte_from, dteTo=dte_to,
                          moneynessFrom=mny_from, moneynessTo=mny_to, cp='P')
    df = pd.concat([calls, puts], ignore_index=True) if not (calls.empty and puts.empty) else pd.DataFrame()
    if df.empty:
        return {"ok": False, "dataset": [], "issues": [{"code":"NO_CHAIN","msg":"no data returned"}], "chain_ref": df}

    # 2) Clean/normalize columns we’ll use
    colmap = {
        "expiration_date":"expiration", "call_put":"cp", "price_strike":"strike",
        "Bid":"bid", "Ask":"ask", "iv":"iv", "option_id":"optionId", "option_symbol":"osym",
        "openinterest":"openInterest", "volume":"volume"
    }
    df = df.rename(columns={k:v for k,v in colmap.items() if k in df.columns}).copy()
    df["cp"]        = df["cp"].str.upper().str[0]
    df["is_settlement"]= df.get("is_settlement", 0).fillna(0)

    for field in ["bid", "ask", "iv", "strike"]:
        numify(df, field)


    for field in ["openInterest", "volume"]:
        numify(df, field, dtype="Int64")
        #df[field] = df[field].fillna(0)

    # 3) Index for fast selection: per (exp, cp) keep a sorted strike list
    by_key = {}
    for (exp, cp), g in df.groupby(["expiration","cp"]):
        g = g.sort_values("strike")
        by_key[(exp, cp)] = g

    # 4) Selection helpers
    def pick_expiry(leg):
        exp = leg.get("expiry") or {}
        iso = exp.get("iso")
        if iso:
            return iso, False  # (chosen, snapped?)
        # fallback month/year → pick nearest listed expiry in that month, else closest overall
        y, m = exp.get("year"), exp.get("month")
        if y and m:
            month_listed = sorted({e for (e,cp) in by_key.keys() if int(e[:4])==y and int(e[5:7])==m})
            if month_listed:
                # prefer the earliest in-month expiry (often weeklies + monthly)
                return month_listed[0], False
        # closest overall to intended 3rd Friday if available
        listed = sorted({e for (e,cp) in by_key.keys()})
        if not listed:
            return None, False
        # crude “closest” by string distance proxy (lex order ≈ chronological for ISO)
        # better: convert to dates and abs delta; left as simple for MVP
        return listed[0], True

    def pick_strike(exp: str, cp: str, target_k: float):
        g = by_key.get((exp, cp))
        if g is None or g.empty:
            return None, None, None, False
        strikes = g["strike"].values
        # nearest by absolute distance
        import numpy as np
        idx = int(np.argmin(np.abs(strikes - target_k)))
        chosen = strikes[idx]
        below = strikes[max(idx-1, 0)]
        above = strikes[min(idx+1, len(strikes)-1)]
        exact = float(chosen) == float(target_k)
        return float(chosen), float(below), float(above), exact

    def best_row(exp: str, cp: str, strike: float):
        g = by_key.get((exp, cp))
        if g is None: return None
        sel = g[g["strike"] == strike]
        if sel.empty: return None
        # prefer non-settlement, then highest OI, then volume
        sel = sel.sort_values(by=["is_settlement","openInterest","volume"], ascending=[True, False, False])
        return sel.iloc[0].to_dict()

    # 5) Resolve each requested leg
    rows = []
    for i, leg in enumerate(requested_legs):
        cp  = str(leg["cp"]).upper()[0]
        k   = float(leg["strike"] if "strike" in leg else leg.get("k"))
        exp, month_snap = pick_expiry(leg)
        if not exp:
            issues.append({"code":"NO_EXP","msg":f"leg {i}: no expiry available"}); continue

        chosen, below, above, exact = pick_strike(exp, cp, k)
        if chosen is None:
            issues.append({"code":"NO_STRIKE","msg":f"leg {i}: no strikes for {cp} @ {exp}"}); continue
        if not exact:
            issues.append({"code":"STRIKE_ADJUSTED","msg":f"leg {i}: {k} -> {chosen} for {cp} @ {exp}", "details":{"below":below,"above":above}})

        r = best_row(exp, cp, chosen)
        if r is None:
            issues.append({"code":"ROW_MISSING","msg":f"leg {i}: missing row {cp} {chosen} @ {exp}"}); continue

        rows.append({
            "optionId": r.get("optionId") or r.get("osym") or f"{sym}:{exp}:{cp}:{int(round(chosen))}",
            "symbol": sym, "cp": cp, "expiration": exp, "strike": float(chosen),
            "bid": r.get("bid"), "ask": r.get("ask"), "iv": r.get("iv"),
            "openInterest": r.get("openInterest"), "volume": r.get("volume"),
            "delta":r.get("delta"),
            "gamma":r.get("gamma"),
            "vega":r.get("vega"),
            "underlying_price":r.get("underlying_price")
        })

    return {"ok": len(rows)==len(requested_legs), "dataset": rows, "issues": issues, "chain_ref": df}

In [6]:
import dotenv
import os
import ivolatility as ivol
import pandas as pd

dotenv.load_dotenv()
api_key = os.getenv("IVOL_API_KEY")
if not api_key:
    raise RuntimeError("❌ IVOL_API_KEY not found in .env file or environment.")

ivol.setLoginParams(apiKey=api_key)
api_key

getMarketData = ivol.setMethod('/equities/eod/stock-opts-by-param')

In [7]:
marketData = getMarketData(symbol='AAPL',tradeDate='2025-09-25',dteFrom=0,dteTo=760,moneynessFrom=-100,moneynessTo=100,cp='C')
marketData_puts=getMarketData(symbol='AAPL',tradeDate='2025-09-25',dteFrom=0,dteTo=760,moneynessFrom=-100,moneynessTo=100,cp='P')
marketData=pd.concat([marketData,marketData_puts])


In [5]:
marketData["openinterest"]

0       17
1       20
2       12
3       10
4        5
        ..
1090     0
1091     0
1092     0
1093     0
1094     0
Name: openinterest, Length: 2190, dtype: int64

In [8]:

requested_legs = [
  {"cp":"P","strike":250.0,"expiry":{"year":2026,"month":1}},  # Jan26 P250
  {"cp":"C","strike":300.0,"expiry":{"year":2026,"month":1}},  # Jan26 C300
]

out = collect_market_and_resolve("AAPL", "2025-10-07", requested_legs, getMarketData)
out["ok"], out["issues"], out["dataset"][:2]
chain_df = out["chain_ref"]   # for smiles/term-structure charts

In [9]:
from market_data_loader import init_ivol_options_client, collect_market_and_resolve

getMarketData = init_ivol_options_client()
out = collect_market_and_resolve("AAPL", "2025-10-07", requested_legs, getMarketData)

dict_keys(['ok', 'dataset', 'issues', 'chain_ref'])

In [17]:
chain_df[chain_df["expiration"]=="2026-01-16"].columns

Index(['c_date', 'osym', 'dte', 'stocks_id', 'expiration', 'cp', 'strike',
       'price_open', 'price_high', 'price_low', 'price', 'volume',
       'openInterest', 'iv', 'delta', 'preiv', 'gamma', 'theta', 'vega', 'rho',
       'ask', 'bid', 'underlying_price', 'calc_OTM', 'optionId',
       'is_settlement'],
      dtype='object')

In [9]:
out

{'ok': True,
 'dataset': [{'optionId': 128416790,
   'symbol': 'AAPL',
   'cp': 'P',
   'expiration': '2026-01-16',
   'strike': 250.0,
   'bid': 9.0,
   'ask': 9.15,
   'iv': 0.244109,
   'openInterest': 16611,
   'volume': 318,
   'delta': -0.375373,
   'gamma': 0.011914,
   'vega': 0.513147,
   'underlying_price': 256.48},
  {'optionId': 128416807,
   'symbol': 'AAPL',
   'cp': 'C',
   'expiration': '2026-01-16',
   'strike': 300.0,
   'bid': 1.75,
   'ask': 1.79,
   'iv': 0.228176,
   'openInterest': 47201,
   'volume': 1465,
   'delta': 0.122472,
   'gamma': 0.006597,
   'vega': 0.273568,
   'underlying_price': 256.48}],
 'issues': [],
 'chain_ref':           c_date                   osym  dte  stocks_id  expiration cp  \
 0     2025-10-07  AAPL  251010C00110000    3        799  2025-10-10  C   
 1     2025-10-07  AAPL  251010C00120000    3        799  2025-10-10  C   
 2     2025-10-07  AAPL  251010C00125000    3        799  2025-10-10  C   
 3     2025-10-07  AAPL  251010C001300

In [None]:
from market_data_loader_v2 import init_ivol_options_client, collect_market_and_resolve

getMarketData = init_ivol_options_client()  # needs IVOL_API_KEY in env

legs = [
    {"cp": "C", "strike": 500, "expiry": {"year": 2026, "month": 6, "iso": None}},
    {"cp": "P", "strike": 440, "expiry": {"iso": "2026-01-16"}},
]

res = await collect_market_and_resolve(
    symbol="AAPL",
    trade_date="2025-10-11",
    requested_legs=legs,
    getMarketData=getMarketData,
    dte_from=0, dte_to=760, mny_from=-90, mny_to=90,
)

if not res["ok"]:
    print("Issues:", res["issues"])
print("Rows:", res["dataset"][:2])
chain_df = res["chain_ref"]

Rows: [{'optionId': 132361949, 'symbol': 'SPY', 'cp': 'C', 'expiration': '2026-06-18', 'strike': 500.0, 'bid': 123.93, 'ask': 126.15, 'mark': 125.04, 'iv': 0.222538, 'openInterest': 46, 'volume': 1, 'delta': 0.787874, 'gamma': 0.00166, 'vega': 2.09094, 'underlying_price': 579.55, 'snapped_expiry': False, 'snapped_strike': False}, {'optionId': 128415714, 'symbol': 'SPY', 'cp': 'P', 'expiration': '2026-01-16', 'strike': 440.0, 'bid': 8.81, 'ask': 8.96, 'mark': 8.885000000000002, 'iv': 0.246596, 'openInterest': 2333, 'volume': 6, 'delta': -0.103674, 'gamma': 0.001146, 'vega': 1.219168, 'underlying_price': 579.55, 'snapped_expiry': False, 'snapped_strike': False}]


In [43]:
chain_df

Unnamed: 0,c_date,osym,dte,stocks_id,expiration,cp,strike,price_open,price_high,price_low,...,theta,vega,rho,ask,bid,underlying_price,calc_OTM,optionId,is_settlement,mark
0,2024-10-11,SPY 241011C00300000,0,627,2024-10-11,C,300.0,276.08,276.08,276.08,...,0.000000,0.000000,0.00000,281.40,277.84,579.55,-48.24,134792703,0,279.620
1,2024-10-11,SPY 241011C00305000,0,627,2024-10-11,C,305.0,,,,...,0.000000,0.000000,0.00000,276.40,272.84,579.55,-47.37,134792705,0,274.620
2,2024-10-11,SPY 241011C00310000,0,627,2024-10-11,C,310.0,,,,...,0.000000,0.000000,0.00000,271.40,267.84,579.55,-46.51,134792707,0,269.620
3,2024-10-11,SPY 241011C00315000,0,627,2024-10-11,C,315.0,,,,...,0.000000,0.000000,0.00000,266.40,262.84,579.55,-45.65,134792709,0,264.620
4,2024-10-11,SPY 241011C00320000,0,627,2024-10-11,C,320.0,,,,...,0.000000,0.000000,0.00000,261.31,257.84,579.55,-44.78,134792711,0,259.575
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9251,2024-10-11,SPY 260618P00825000,615,627,2026-06-18,P,825.0,,,,...,-0.003254,0.566045,-0.04999,248.00,243.00,579.55,-42.35,133373834,0,245.500
9252,2024-10-11,SPY 260618P00830000,615,627,2026-06-18,P,830.0,,,,...,-0.003287,0.562164,-0.04999,253.00,248.00,579.55,-43.21,133373836,0,250.500
9253,2024-10-11,SPY 260618P00835000,615,627,2026-06-18,P,835.0,,,,...,-0.003321,0.560238,-0.04999,258.00,253.00,579.55,-44.08,133460624,0,255.500
9254,2024-10-11,SPY 260618P00840000,615,627,2026-06-18,P,840.0,,,,...,-0.003354,0.559264,-0.04999,263.00,258.00,579.55,-44.94,133460626,0,260.500


In [12]:
bus_list("legs")
#bus_get("spec", 'AAPL-Jan26-custom') 


[legs] 1 key(s)
  - AAPL-Jan26-custom


In [9]:
bus_get("spec", 'AAPL-Jan26-custom')

{
  "status": "done",
  "producer": "parser_py",
  "payload": {
    "ok": true,
    "quantity": 500,
    "symbol": "AAPL",
    "maturities": [
      "Jan26"
    ],
    "strikes": [
      250.0,
      300.0
    ],
    "ratio": "1x1",
    "structure": "custom",
    "legs": {
      "legs": [
        {
          "cp": "P",
          "strike": 250.0,
          "side": "AUTO",
          "ratio": 1,
          "qty": 500,
          "expiry": {
            "year": 2026,
            "month": 1,
            "iso": "2026-01-16",
            "label": "Jan26"
          }
        },
        {
          "cp": "C",
          "strike": 300.0,
          "side": "AUTO",
          "ratio": 1,
          "qty": 500,
          "expiry": {
            "year": 2026,
            "month": 1,
            "iso": "2026-01-16",
            "label": "Jan26"
          }
        }
      ],
      "issues": [],
      "notes": ""
    }
  }
}


{'status': 'done',
 'producer': 'parser_py',
 'payload': {'ok': True,
  'quantity': 500,
  'symbol': 'AAPL',
  'maturities': ['Jan26'],
  'strikes': [250.0, 300.0],
  'ratio': '1x1',
  'structure': 'custom',
  'legs': {'legs': [{'cp': 'P',
     'strike': 250.0,
     'side': 'AUTO',
     'ratio': 1,
     'qty': 500,
     'expiry': {'year': 2026,
      'month': 1,
      'iso': '2026-01-16',
      'label': 'Jan26'}},
    {'cp': 'C',
     'strike': 300.0,
     'side': 'AUTO',
     'ratio': 1,
     'qty': 500,
     'expiry': {'year': 2026,
      'month': 1,
      'iso': '2026-01-16',
      'label': 'Jan26'}}],
   'issues': [],
   'notes': ''}}}

In [44]:
from market_data_loader_v2 import fetch_polygon_dividends
from dividends import forecast_dividends_from_history, to_quantlib_dividend_schedule
df= await fetch_polygon_dividends("AAPL")
df
fc=forecast_dividends_from_history(df)
ql_div, sched_tuples=to_quantlib_dividend_schedule(fc, date_col='ex_dividend_date', amount_col='cash_amount')
fc

UnboundLocalError: cannot access local variable 'template_mmdd' where it is not associated with a value

In [15]:
df

Unnamed: 0,id,cash_amount,currency,declaration_date,dividend_type,ex_dividend_date,frequency,pay_date,record_date,ticker
0,E2ab18998fec423fdb40a4f3bdb6df573c30c51c00039e...,2.65,USD,NaT,CD,2012-08-09,0,2012-08-16,2012-08-13,AAPL
1,E11cfbce7b91c73bba1c5601ebf5e9eee8029555e840c8...,2.65,USD,2012-10-25,CD,2012-11-07,0,2012-11-15,2012-11-12,AAPL
2,E02164e6e37d6cd92baf3031d438e9a43b9108f829b392...,2.65,USD,2013-01-23,CD,2013-02-07,0,2013-02-14,2013-02-11,AAPL
3,E70c3a508297eeed801e122bf8e8d22ff071586fedc534...,3.05,USD,2013-04-23,CD,2013-05-09,4,2013-05-16,2013-05-13,AAPL
4,Ea728d471c41372685beac963f316e597f573a5e645251...,3.05,USD,2013-07-23,CD,2013-08-08,4,2013-08-15,2013-08-12,AAPL
5,E8d0e55b85e7913bfb9ba5b9be3ca250f02b20520d458d...,3.05,USD,2013-10-28,CD,2013-11-06,4,2013-11-14,2013-11-11,AAPL
6,E829ac37db0f5e0afb05a3a0cdadd1da9c83a63471b4dd...,3.05,USD,2014-01-27,CD,2014-02-06,4,2014-02-13,2014-02-10,AAPL
7,E8c00b94f3daa63f295cd3a805106a98569e9f99df11ae...,3.29,USD,2014-04-23,CD,2014-05-08,4,2014-05-15,2014-05-12,AAPL
8,E0d1cf507e1730632e9067d3e087be226465fc08200c59...,0.47,USD,2014-07-22,CD,2014-08-07,4,2014-08-14,2014-08-11,AAPL
9,E8c84538254e81904c13ed8989b06de1760add6f31e763...,0.47,USD,2014-10-20,CD,2014-11-06,4,2014-11-13,2014-11-10,AAPL


In [18]:
import pandas as pd

ALLOWED_FREQ_STEPS = {1:12, 2:6, 3:4, 4:3, 6:2, 12:1}

def forecast_dividends_from_history_rolling(
    df_history: pd.DataFrame,
    *,
    years_ahead: int = 3,
    increase_dollars: float = 0.05,
    ref_year: int | None = None,
    date_field: str = "ex_dividend_date",
    use_business_day_roll: bool = True,
    frequency_override: int | None = None,      # 1,2,3,4,6,12
    first_dates_override: list[str] | None = None,  # ["MM-DD", ...] or full dates
) -> pd.DataFrame:
    """
    Build a dividend forecast starting *after* the last known ex-dividend date.
    Includes remaining dates in the current (partial) year, then `years_ahead`
    full years. Applies annual increases on the mapped anchor date.

    Returns a DataFrame with: [date_field, cash_amount, increase_applied, source, (optional) ticker]
    """

    if df_history is None or df_history.empty:
        raise ValueError("df_history is empty")

    df = df_history.copy()
    # Normalize dates & amounts
    for col in ["ex_dividend_date", "pay_date", "declaration_date", "record_date"]:
        if col in df.columns:
            df[col] = pd.to_datetime(df[col], errors="coerce")
    if "cash_amount" not in df.columns:
        raise ValueError("df_history must contain 'cash_amount'")
    df["cash_amount"] = pd.to_numeric(df["cash_amount"], errors="coerce").astype(float)

    # Sort and identify last known
    df = df.dropna(subset=[date_field]).sort_values(date_field).reset_index(drop=True)
    last_known_date = pd.Timestamp(df[date_field].iloc[-1])
    last_known_year = int(last_known_date.year)

    # --- helpers ---
    def _roll_to_business_day(ts: pd.Timestamp) -> pd.Timestamp:
        if not use_business_day_roll:
            return ts
        while ts.weekday() >= 5:  # Sat/Sun -> next business day
            ts = ts + pd.Timedelta(days=1)
        return ts

    def _parse_mmdd_list(dates_like, base_year):
        out = []
        for x in dates_like:
            ts = x if isinstance(x, pd.Timestamp) else pd.to_datetime(str(x), errors="raise")
            # normalize to MM-DD (ignore year from input)
            out.append(ts.strftime("%m-%d"))
        return sorted(out)

    def _generate_template_from_first(mmdd_first, freq, base_year):
        step = ALLOWED_FREQ_STEPS.get(freq)
        if step is None:
            raise ValueError(f"Unsupported frequency {freq}. Use one of {sorted(ALLOWED_FREQ_STEPS)}.")
        start = pd.Timestamp(f"{base_year}-{mmdd_first}")
        res = []
        for k in range(freq):
            dt = start + pd.offsets.DateOffset(months=step * k)
            res.append(dt.strftime("%m-%d"))
        return sorted(res)

    def _complete_template(mmdd_list, freq, base_year):
        """If provided < freq dates, fill the rest by equal-month stepping."""
        step = ALLOWED_FREQ_STEPS.get(freq)
        if step is None:
            raise ValueError(f"Unsupported frequency {freq}. Use one of {sorted(ALLOWED_FREQ_STEPS)}.")
        seeds = sorted(mmdd_list)
        if len(seeds) >= freq:
            return seeds[:freq]
        if len(seeds) == 1:
            return _generate_template_from_first(seeds[0], freq, base_year)
        cur = pd.Timestamp(f"{base_year}-{seeds[-1]}")
        out = seeds[:]
        while len(out) < freq:
            cur = cur + pd.offsets.DateOffset(months=step)
            out.append(cur.strftime("%m-%d"))
        return sorted(out)

    def _template_date(year: int, mmdd: str) -> pd.Timestamp:
        m, d = map(int, mmdd.split("-"))
        return _roll_to_business_day(pd.Timestamp(year=year, month=m, day=d))

    # --- choose frequency ---
    year_counts = df[date_field].dt.year.value_counts()
    freq_default = int(year_counts.max()) if not year_counts.empty else 4
    freq = int(frequency_override) if frequency_override else freq_default
    if freq not in ALLOWED_FREQ_STEPS:
        raise ValueError(f"frequency {freq} not in {sorted(ALLOWED_FREQ_STEPS)}")

    # --- build template (MM-DD set) ---
    if first_dates_override:
        seeds = _parse_mmdd_list(first_dates_override, base_year=last_known_year)
        template_mmdd = _complete_template(seeds, freq, base_year=last_known_year)
    else:
        # pick the most recent year with >= freq dividends, else the year with the max count
        if ref_year is None:
            candidates = year_counts[year_counts >= freq].sort_index()
            if not candidates.empty:
                ref_year = int(candidates.index.max())
            else:
                ref_year = int(year_counts.sort_index().index.max())
        df_ref = df[df[date_field].dt.year == ref_year]
        seeds = df_ref[date_field].dt.strftime("%m-%d").tolist()
        template_mmdd = _complete_template(seeds, freq, base_year=last_known_year)

    # --- map last_known_date to nearest template slot ---
    cands = [_template_date(last_known_year, mm) for mm in template_mmdd]
    diffs = [abs((c - last_known_date).days) for c in cands]
    i_last = diffs.index(min(diffs))  # index of nearest template slot in current year

    # next slot index & starting year
    idx = (i_last + 1) % freq
    year = last_known_year if idx > 0 else last_known_year + 1

    # --- increase anchor (use nearest template date in ref_year) ---
    df_all = df.copy()
    diffs_amt = df_all["cash_amount"].diff()
    anchor_year = ref_year if ref_year is not None else last_known_year
    inc_rows = df_all[(df_all[date_field].dt.year == anchor_year) & (diffs_amt > 0)]
    mapped_anchor = None
    anchor_date_in_anchor_year = None
    if not inc_rows.empty:
        inc_dt = pd.Timestamp(inc_rows.iloc[0][date_field])
        inc_mmdd = inc_dt.strftime("%m-%d")
        # map to nearest template date (NOT first >=) to avoid month-drift mistakes
        ref_cands = [_template_date(anchor_year, mm) for mm in template_mmdd]
        ref_diffs = [abs((c - inc_dt).days) for c in ref_cands]
        j = ref_diffs.index(min(ref_diffs))
        mapped_anchor = template_mmdd[j]
        anchor_date_in_anchor_year = ref_cands[j]

    current_amount = float(df_all["cash_amount"].iloc[-1])

    # --- generate future stream ---
    rows = []
    end_year = last_known_year + years_ahead
    have_ticker = "ticker" in df.columns
    ticker_val = df["ticker"].iloc[-1] if have_ticker else None

    while year <= end_year:
        mmdd = template_mmdd[idx]
        dt = _template_date(year, mmdd)

        # Strictly after last_known_date
        if dt <= last_known_date:
            idx = (idx + 1) % freq
            if idx == 0:
                year += 1
            continue

        inc_applied = False
        if mapped_anchor:
            # apply increase on mapped anchor once each future year
            if mmdd == mapped_anchor:
                if (year > anchor_year) or (
                    year == anchor_year and anchor_date_in_anchor_year is not None and anchor_date_in_anchor_year > last_known_date
                ):
                    current_amount += increase_dollars
                    inc_applied = True
        else:
            # No historical anchor → first template date of the first FULL forecast year
            if (year > last_known_year) and (mmdd == template_mmdd[0]):
                current_amount += increase_dollars
                inc_applied = True

        row = {
            date_field: dt,
            "cash_amount": round(float(current_amount), 4),
            "increase_applied": inc_applied,
            "source": "forecast",
        }
        if have_ticker:
            row["ticker"] = ticker_val
        rows.append(row)

        idx = (idx + 1) % freq
        if idx == 0:
            year += 1

    out = pd.DataFrame(rows).sort_values(date_field).reset_index(drop=True)
    return out

In [21]:
fc=forecast_dividends_from_history_rolling(df)
fc

Unnamed: 0,ex_dividend_date,cash_amount,increase_applied,source,ticker
0,2025-11-10,0.26,False,forecast,AAPL
1,2026-02-09,0.26,False,forecast,AAPL
2,2026-05-11,0.31,True,forecast,AAPL
3,2026-08-12,0.31,False,forecast,AAPL
4,2026-11-09,0.31,False,forecast,AAPL
5,2027-02-09,0.31,False,forecast,AAPL
6,2027-05-10,0.36,True,forecast,AAPL
7,2027-08-12,0.36,False,forecast,AAPL
8,2027-11-08,0.36,False,forecast,AAPL
9,2028-02-09,0.36,False,forecast,AAPL


In [23]:
import calendar as _cal
from typing import Iterable

def _same_weekday_anniversary(ts: pd.Timestamp, target_year: int, mode: str = "nearest") -> pd.Timestamp:
    """Project `ts` to `target_year`, adjusting so the weekday matches `ts.weekday()`.
    mode: 'nearest' | 'following' | 'preceding' (tie -> following)
    """
    ts = pd.Timestamp(ts)
    m, d = ts.month, ts.day
    last_dom = _cal.monthrange(target_year, m)[1]
    d = min(d, last_dom)
    base = pd.Timestamp(year=target_year, month=m, day=d)
    want = ts.weekday()  # 0=Mon..6=Sun
    have = base.weekday()
    fwd_shift = (want - have) % 7
    back_shift = -((have - want) % 7)
    cand_fwd = base + pd.Timedelta(days=fwd_shift)
    cand_back = base + pd.Timedelta(days=back_shift)
    if mode == "following":
        return cand_fwd
    if mode == "preceding":
        return cand_back
    # nearest (tie -> following)
    return cand_fwd if abs(fwd_shift) <= abs(back_shift) else cand_back


def _project_year_dates_same_weekday(prev_year_dates: Iterable[pd.Timestamp], target_year: int, mode: str = "nearest") -> list[pd.Timestamp]:
    dates = sorted(pd.to_datetime(list(prev_year_dates)).tolist())
    return [_same_weekday_anniversary(pd.Timestamp(dt), target_year, mode=mode) for dt in dates]


def forecast_dividends_same_weekday(
    df_history: pd.DataFrame,
    *,
    years_ahead: int = 3,
    increase_dollars: float = 0.05,
    date_field: str = "ex_dividend_date",
    keep_same_weekday: bool = True,   # kept for clarity; this implementation always preserves weekday
    weekday_mode: str = "nearest",    # "nearest" | "following" | "preceding"
    roll_weekends: bool = False,      # if True, weekend-roll to Monday (may change weekday)
) -> pd.DataFrame:
    """
    Forecast dividends by projecting each year's *actual* ex-div dates forward one year,
    preserving the weekday of each event.

    Strategy:
      - Fill the remainder of the current year by projecting the most recent *completed* year.
      - For each future year Y, project the dates from Y-1 to Y keeping the same weekday.
      - Apply annual increases on the mapped anchor date.

    Expects df_history columns: [date_field, cash_amount, (optional) ticker].
    """

    if df_history is None or df_history.empty:
        raise ValueError("df_history is empty")

    df = df_history.copy()
    df[date_field] = pd.to_datetime(df[date_field], errors="coerce")
    if "cash_amount" not in df.columns:
        raise ValueError("df_history must contain 'cash_amount'")
    df["cash_amount"] = pd.to_numeric(df["cash_amount"], errors="coerce").astype(float)
    df = df.dropna(subset=[date_field, "cash_amount"]).sort_values(date_field).reset_index(drop=True)

    last_known_date = pd.Timestamp(df[date_field].iloc[-1])
    last_year = int(last_known_date.year)
    have_ticker = "ticker" in df.columns
    ticker_val = df["ticker"].iloc[-1] if have_ticker else None

    # Optional weekend roll helper
    def _weekend_roll(ts: pd.Timestamp) -> pd.Timestamp:
        if not roll_weekends:
            return ts
        while ts.weekday() >= 5:
            ts = ts + pd.Timedelta(days=1)
        return ts

    # Determine the most recent fully-completed reference year (< last_year)
    counts = df[date_field].dt.year.value_counts().sort_index()
    completed_years = [y for y in counts.index if y < last_year]
    if completed_years:
        max_count = counts.loc[completed_years].max()
        ref_full_year = int(max([y for y in completed_years if counts.loc[y] == max_count]))
    else:
        ref_full_year = int(counts.index.min())  # fallback

    def _year_dates(year: int) -> list[pd.Timestamp]:
        return sorted(pd.to_datetime(df.loc[df[date_field].dt.year == year, date_field]).dropna().tolist())

    # Detect latest historical increase anchor
    df_sorted = df.sort_values(date_field).reset_index(drop=True)
    diffs_amt = df_sorted["cash_amount"].diff()
    inc_idx = diffs_amt[diffs_amt > 0].index
    inc_dt = None
    if len(inc_idx) > 0:
        inc_dt = pd.Timestamp(df_sorted.loc[inc_idx[-1], date_field])

    current_amount = float(df["cash_amount"].iloc[-1])
    rows: list[dict] = []

    # --- Remainder of current year: project ref_full_year -> last_year ---
    seeds_prev = _year_dates(ref_full_year)
    if not seeds_prev:
        raise ValueError(f"No historical dates in reference year {ref_full_year}")

    curr_year_full = _project_year_dates_same_weekday(seeds_prev, last_year, mode=weekday_mode)
    for dt in sorted(curr_year_full):
        dt = _weekend_roll(dt)
        if dt > last_known_date:
            row = {date_field: dt, "cash_amount": round(current_amount, 4), "increase_applied": False, "source": "forecast"}
            if have_ticker:
                row["ticker"] = ticker_val
            rows.append(row)

    # --- Future full years: project Y-1 -> Y each time ---
    prev_year_dates = curr_year_full
    for Y in range(last_year + 1, last_year + 1 + years_ahead):
        year_dates = _project_year_dates_same_weekday(prev_year_dates, Y, mode=weekday_mode)
        year_dates = [_weekend_roll(pd.Timestamp(d)) for d in sorted(year_dates)]

        # Map increase anchor to this year's schedule
        anchor_target = None
        if inc_dt is not None:
            nominal_anchor = _same_weekday_anniversary(inc_dt, Y, mode=weekday_mode)
            anchor_target = min(year_dates, key=lambda d: abs((d - nominal_anchor).days))

        for dt in year_dates:
            inc_applied = False
            if anchor_target is not None and dt == anchor_target:
                current_amount += float(increase_dollars)
                inc_applied = True
            row = {date_field: dt, "cash_amount": round(current_amount, 4), "increase_applied": inc_applied, "source": "forecast"}
            if have_ticker:
                row["ticker"] = ticker_val
            rows.append(row)

        prev_year_dates = year_dates

    out = pd.DataFrame(rows).sort_values(date_field).reset_index(drop=True)
    return out


Unnamed: 0,id,cash_amount,currency,declaration_date,dividend_type,ex_dividend_date,frequency,pay_date,record_date,ticker
0,E2ab18998fec423fdb40a4f3bdb6df573c30c51c00039e...,2.65,USD,NaT,CD,2012-08-09,0,2012-08-16,2012-08-13,AAPL
1,E11cfbce7b91c73bba1c5601ebf5e9eee8029555e840c8...,2.65,USD,2012-10-25,CD,2012-11-07,0,2012-11-15,2012-11-12,AAPL
2,E02164e6e37d6cd92baf3031d438e9a43b9108f829b392...,2.65,USD,2013-01-23,CD,2013-02-07,0,2013-02-14,2013-02-11,AAPL
3,E70c3a508297eeed801e122bf8e8d22ff071586fedc534...,3.05,USD,2013-04-23,CD,2013-05-09,4,2013-05-16,2013-05-13,AAPL
4,Ea728d471c41372685beac963f316e597f573a5e645251...,3.05,USD,2013-07-23,CD,2013-08-08,4,2013-08-15,2013-08-12,AAPL
5,E8d0e55b85e7913bfb9ba5b9be3ca250f02b20520d458d...,3.05,USD,2013-10-28,CD,2013-11-06,4,2013-11-14,2013-11-11,AAPL
6,E829ac37db0f5e0afb05a3a0cdadd1da9c83a63471b4dd...,3.05,USD,2014-01-27,CD,2014-02-06,4,2014-02-13,2014-02-10,AAPL
7,E8c00b94f3daa63f295cd3a805106a98569e9f99df11ae...,3.29,USD,2014-04-23,CD,2014-05-08,4,2014-05-15,2014-05-12,AAPL
8,E0d1cf507e1730632e9067d3e087be226465fc08200c59...,0.47,USD,2014-07-22,CD,2014-08-07,4,2014-08-14,2014-08-11,AAPL
9,E8c84538254e81904c13ed8989b06de1760add6f31e763...,0.47,USD,2014-10-20,CD,2014-11-06,4,2014-11-13,2014-11-10,AAPL


In [25]:
fc=forecast_dividends_same_weekday(df)
fc

Unnamed: 0,ex_dividend_date,cash_amount,increase_applied,source,ticker
0,2025-11-07,0.26,False,forecast,AAPL
1,2026-02-06,0.26,False,forecast,AAPL
2,2026-05-08,0.31,True,forecast,AAPL
3,2026-08-10,0.31,False,forecast,AAPL
4,2026-11-06,0.31,False,forecast,AAPL
5,2027-02-05,0.31,False,forecast,AAPL
6,2027-05-07,0.36,True,forecast,AAPL
7,2027-08-09,0.36,False,forecast,AAPL
8,2027-11-05,0.36,False,forecast,AAPL
9,2028-02-04,0.36,False,forecast,AAPL


In [27]:
from dividends import forecast_dividends_same_weekday
fc=forecast_dividends_same_weekday(df)
fc

Unnamed: 0,ex_dividend_date,cash_amount,increase_applied,source,ticker
0,2025-11-07,0.26,False,forecast,AAPL
1,2026-02-06,0.26,False,forecast,AAPL
2,2026-05-08,0.31,True,forecast,AAPL
3,2026-08-10,0.31,False,forecast,AAPL
4,2026-11-06,0.31,False,forecast,AAPL
5,2027-02-05,0.31,False,forecast,AAPL
6,2027-05-07,0.36,True,forecast,AAPL
7,2027-08-09,0.36,False,forecast,AAPL
8,2027-11-05,0.36,False,forecast,AAPL
9,2028-02-04,0.36,False,forecast,AAPL


In [37]:
yc=pd.read_csv("../yc_snap.csv")
yrs=yc["yrs"]
rates=yc["rate"]
yrs

0     1
1     2
2     3
3     5
4     7
5    10
6    15
7    30
Name: yrs, dtype: int64

In [40]:
import QuantLib as ql
import math

def zero_curve_from_csv(valuation_date: ql.Date, rows, day_count=ql.Actual365Fixed(), cal=ql.UnitedStates(ql.UnitedStates.NYSE)):
    """
    rows: iterable of (T_years, r_cont_zero). Builds a ZeroCurve dated from valuation_date.
    """
    dates = [valuation_date] + [valuation_date + ql.Period(int(round(T*365)), ql.Days) for T, _ in rows]
    rates = [rows[0][1]] + [float(r) for _, r in rows]  # pad front; QL expects >= 2 nodes
    return ql.YieldTermStructureHandle(ql.ZeroCurve(dates, rates, day_count, cal))

class QLAmericanPricer:
    def __init__(self, valuation_date: ql.Date, r_curve: ql.YieldTermStructureHandle,
                 init_vol: float = 0.20, cal=ql.UnitedStates(ql.UnitedStates.NYSE), dc=ql.Actual365Fixed(),
                 t_grid: int = 400, x_grid: int = 200):
        ql.Settings.instance().evaluationDate = valuation_date
        self.val_date = valuation_date
        self.cal, self.dc = cal, dc
        self.t_grid, self.x_grid = t_grid, x_grid

        # live quotes you can bump
        self.spot_q = ql.SimpleQuote(0.0)
        self.vol_q  = ql.SimpleQuote(max(1e-8, init_vol))

        # handles
        self.u   = ql.QuoteHandle(self.spot_q)
        self.vts = ql.BlackVolTermStructureHandle(ql.BlackConstantVol(valuation_date, cal, ql.QuoteHandle(self.vol_q), dc))
        self.rts = r_curve
        self.dts = ql.YieldTermStructureHandle(ql.FlatForward(valuation_date, 0.0, dc, ql.Continuous))  # cont. q=0; use cash divs explicitly

        # stochastic process reused across prices
        self.process = ql.BlackScholesMertonProcess(self.u, self.dts, self.rts, self.vts)

    def _div_schedule(self, dividends):
        sched = []
        if not dividends: return sched
        for d, amt in dividends:
            dq = d if isinstance(d, ql.Date) else ql.Date(d.day, d.month, d.year)
            if dq > self.val_date:
                sched.append(ql.FixedDividend(float(amt), dq))
        return sched

    def price(self, S, K, maturity: ql.Date, cp: str,
              vol: float | None = None, dividends: list[tuple] | None = None,
              want_greeks: bool = True, vega_eps: float = 1e-4):
        # set live quotes
        self.spot_q.setValue(float(S))
        if vol is not None:
            self.vol_q.setValue(max(1e-8, float(vol)))

        payoff   = ql.PlainVanillaPayoff(ql.Option.Call if cp.upper().startswith('C') else ql.Option.Put, float(K))
        exercise = ql.AmericanExercise(self.val_date, maturity)
        option   = ql.VanillaOption(payoff, exercise)

        divs = self._div_schedule(dividends or [])
        # try FDM engine with explicit dividend schedule
        engine = None
        try:
            engine = ql.FdBlackScholesVanillaEngine(self.process, divs, self.t_grid, self.x_grid)
        except TypeError:
            # fallback: escrowed dividend approx (subtract PV of cash divs from spot)
            pv = sum(d.amount() * self.rts.discount(d.date()) for d in divs)
            self.spot_q.setValue(float(S) - pv)
            engine = ql.FdBlackScholesVanillaEngine(self.process, self.t_grid, self.x_grid)

        option.setPricingEngine(engine)

        out = {"theo": option.NPV()}
        if not want_greeks:
            # restore spot if we escrowed it
            self.spot_q.setValue(float(S))
            return out

        # Greks: ask engine; if missing, do light numeric where needed
        for g in ("delta","gamma","theta"):
            try:
                out[g] = getattr(option, g)()
            except RuntimeError:
                out[g] = None

        # vega: some PDE engines don't provide it → numeric
        try:
            out["vega"] = option.vega()
        except RuntimeError:
            v0 = self.vol_q.value()
            self.vol_q.setValue(v0 + vega_eps); p_up = ql.VanillaOption(payoff, exercise); p_up.setPricingEngine(engine); upv = p_up.NPV()
            self.vol_q.setValue(v0 - vega_eps); p_dn = ql.VanillaOption(payoff, exercise); p_dn.setPricingEngine(engine); dnv = p_dn.NPV()
            self.vol_q.setValue(v0)
            out["vega"] = ((upv - dnv) / (2*vega_eps)) * 0.01  # per 1 vol point

        # restore spot if escrow path
        self.spot_q.setValue(float(S))
        return out

In [None]:

yrs=yc["yrs"].tolist()
rates=yc["rate"].tolist()
yield_rows=list(zip(yrs, rates))
dt_today=ql.Date(12, 10, 2025)
val=ql.Date(12, 10, 2025)
ql_yield_curve=zero_curve_from_csv(val, yield_rows)



In [None]:
pricer  = QLAmericanPricer(val, ql_yield_curve, init_vol=0.25)
pricer.price(S=190, K=200, maturity=mat, cp='P', vol=0.25, dividends=ql_divs)