In [2]:

import re


def parse_strategy_python(text: str) -> dict:
    """
    Very rough parser. Returns { ok, spec?, issues }.
    Examples supported: 'trade 500 AAPL jan26 250/300 collar'
    """
    t = text.lower()
    m = re.search(r'(\d+)\s*([a-z]+)\s+(\w+)\s+(\d{3})/(\d{3})\s+(collar|vertical|calendar|diagonal)', t)
    if not m:
        return {"ok": False, "issues": [{"code":"PARSE_FAIL","msg":"Pattern not recognized"}]}
    size = int(m.group(1)); month_str = m.group(2); sym = m.group(3).upper()
    k1 = float(m.group(4)); k2 = float(m.group(5)); structure = m.group(6)
    mon_map = {'jan':1,'feb':2,'mar':3,'apr':4,'may':5,'jun':6,'jul':7,'aug':8,'sep':9,'oct':10,'nov':11,'dec':12}
    month = mon_map.get(month_str[:3], None)
    year = 2026  # naive default; swap with your parse_expiry_mmmyy if present
    if not month:
        return {"ok": False, "issues": [{"code":"BAD_MONTH","msg":f"Month '{month_str}' not recognized"}]}
    spec = {"symbol": sym, "year": year, "month": month, "size": size,
            "structure": structure, "legs": [{"cp":"P","k":k1},{"cp":"C","k":k2}]}
    return {"ok": True, "spec": spec, "issues": []}

In [10]:
strat="trade 500 AAPL jan26 250/300 collar"

result = parse_strategy_python(strat)
print(result)


{'ok': False, 'issues': [{'code': 'BAD_MONTH', 'msg': "Month 'aapl' not recognized"}]}


In [2]:
# simple_strategy_parser.py
import re
from dataclasses import dataclass
from typing import Optional, List

_MON = {"jan":1,"feb":2,"mar":3,"apr":4,"may":5,"jun":6,
        "jul":7,"aug":8,"sep":9,"oct":10,"nov":11,"dec":12}

def parse_mmmyy(token: str) -> Optional[tuple[int,int,str]]:
    """
    Accepts: Jan26, Jan 2026, 2026-01, 01/2026, oct25, OCT-2027
    Returns (year, month, normalized_expiry_str) or None.
    """
    t = token.strip().lower()
    # Jan26 / Jan 2026 / Oct-2025
    m = re.match(r"([a-z]{3})[ \-_/]*([0-9]{2,4})$", t)
    if m:
        mon = _MON.get(m.group(1))
        yy = m.group(2)
        if mon:
            if len(yy) == 2:
                y = 2000 + int(yy)
            else:
                y = int(yy)
            return (y, mon, m.group(1).title() + (yy if len(yy)==2 else str(y)))
    # 2026-01 / 01/2026
    m = re.match(r"([0-9]{4})[ \-_/]*([0-9]{1,2})$", t) or re.match(r"([0-9]{1,2})[ \-_/]*([0-9]{4})$", t)
    if m:
        a, b = int(m.group(1)), int(m.group(2))
        if a > 1900: y, mon = a, b
        else:        y, mon = b, a
        if 1 <= mon <= 12:
            # create a Jan26-like label for convenience
            mon3 = [k for k,v in _MON.items() if v==mon][0].title()
            yy2  = str(y)[-2:]
            return (y, mon, f"{mon3}{yy2}")
    return None

def parse_strategy_text(text: str) -> dict:
    """
    Returns the strict JSON-ish dict:
    { ok, spec{ symbol,size,structure,legs[{cp, strike, side, expiry_str, expiry_year, expiry_month, exp_hint}] }, issues[], ask{} }
    """
    t = text.strip()
    low = t.lower()

    # symbol (simple: first ALLCAPS token or explicit)
    sym = None
    for tok in re.findall(r"[A-Za-z]{1,5}", t):
        if tok.isupper() and 1 <= len(tok) <= 5:
            sym = tok; break
    # size (optional)
    size = 1
    m = re.search(r"\b(sell|buy)\s+(-?\d+)", low)
    if m:
        size = int(m.group(2))
        if m.group(1) == "sell" and size > 0:
            size = -size
    else:
        m2 = re.search(r"\b(\d{1,6})\b", low)
        if m2: size = int(m2.group(1))

    # structure (default to collar/vertical/calendar if stated)
    structure = None
    for s in ["collar","vertical","calendar","diagonal","strangle","straddle","covered_call"]:
        if s in low:
            structure = s; break
    if not structure:
        structure = "custom"

    # strikes (handles 250/300 or '250 300')
    strikes: List[float] = []
    m = re.search(r"(\d{2,5})\s*[\/ ]\s*(\d{2,5})", low)
    if m:
        strikes = [float(m.group(1)), float(m.group(2))]
    else:
        nums = [float(x) for x in re.findall(r"\b\d{2,5}\b", low)]
        # try to pick two that look like strikes (ignore the first number if it was size)
        if len(nums) >= 2:
            # crude: take the last two  (size is usually first)
            strikes = nums[-2:]

    # cp (try to infer from structure or explicit 'C/P')
    cp_tokens = re.findall(r"\b([cp])\b", low)
    # default for collar: P then C; vertical: assume CP from token or leave None
    cps = []
    if structure == "collar":
        cps = ["P","C"]
    elif cp_tokens:
        # If 'C' appears twice with two strikes, assume call spread; similar for puts
        if len(cp_tokens) == 1 and len(strikes) == 2:
            cps = [cp_tokens[0].upper(), cp_tokens[0].upper()]
        else:
            cps = [x.upper() for x in cp_tokens[:len(strikes)]]
    elif structure in ("strangle","straddle"):
        cps = ["P","C"]
    # expiry (look for a token that looks like a month-year)
    expiry = None
    for tok in re.findall(r"[A-Za-z]{3}[ \-_/]*\d{2,4}|\d{4}[ \-_/]*\d{1,2}|\d{1,2}[ \-_/]*\d{4}", t):
        expiry = parse_mmmyy(tok)
        if expiry: break

    # build legs (assume 2 legs if we found 2 strikes; else 1)
    legs = []
    if strikes:
        if not cps:
            # fallbacks: if collar or vertical and no CP tokens, infer from structure
            if structure in ("collar","vertical","diagonal","calendar"):
                cps = ["P","C"] if structure=="collar" else (["C","C"] if " c " in f" {low} " else ["P","P"])
            else:
                cps = ["P","C"][:len(strikes)]
        for i,k in enumerate(strikes[:2]):
            cp = cps[i] if i < len(cps) else ("C" if i==1 else "P")
            # default sides: collar = buy put / sell call; vertical sell size negative implies short first leg, etc.
            side = "AUTO"
            if structure == "collar":
                side = "BUY" if cp=="P" else "SELL"
            # expiry fields
            if expiry:
                y, m, es = expiry
                legs.append({"cp": cp, "strike": float(k), "side": side,
                             "expiry_str": es, "expiry_year": y, "expiry_month": m, "exp_hint": None})
            else:
                legs.append({"cp": cp, "strike": float(k), "side": side,
                             "expiry_str": None, "expiry_year": None, "expiry_month": None, "exp_hint": None})
    else:
        # no strikes parsed → single placeholder leg
        legs = [{"cp":"C","strike":None,"side":"AUTO","expiry_str":None,"expiry_year":None,"expiry_month":None,"exp_hint":None}]

    issues = []
    if not sym:   issues.append({"code":"MISSING_FIELD","msg":"Symbol not found","field":"symbol"})
    if not strikes: issues.append({"code":"MISSING_FIELD","msg":"Strike(s) not found","field":"legs[].strike"})
    # If calendar and only one expiry present, mark hints
    if structure in ("calendar","diagonal") and expiry and len(legs) == 2:
        legs[0]["exp_hint"] = "front"; legs[1]["exp_hint"] = "back"

    ok = (len(issues)==0)
    spec = {"symbol": sym or "", "size": int(size), "structure": structure, "legs": legs, "notes": ""}

    ask = {"question": None, "options": []}
    if not ok:
        # one concise question prioritizing expiry or strikes
        if any(i["field"].startswith("legs") for i in issues):
            ask["question"] = "Which month/year and strikes do you want (e.g., Jan26 250/300)?"
        elif not sym:
            ask["question"] = "What ticker?"
    return {"ok": ok, "spec": spec, "issues": issues, "ask": ask}

In [3]:
parse_strategy_text("i want 1000 jan26 jan27  MSFT 250/350 cs")

{'ok': True,
 'spec': {'symbol': 'MSFT',
  'size': 1000,
  'structure': 'custom',
  'legs': [{'cp': 'P',
    'strike': 250.0,
    'side': 'AUTO',
    'expiry_str': 'Jan26',
    'expiry_year': 2026,
    'expiry_month': 1,
    'exp_hint': None},
   {'cp': 'C',
    'strike': 350.0,
    'side': 'AUTO',
    'expiry_str': 'Jan26',
    'expiry_year': 2026,
    'expiry_month': 1,
    'exp_hint': None}],
  'notes': ''},
 'issues': [],
 'ask': {'question': None, 'options': []}}

In [4]:
import re
from datetime import date, timedelta
from typing import List, Dict, Set,Any, Optional,Tuple

_MON = {"jan":1,"feb":2,"mar":3,"apr":4,"may":5,"jun":6,
        "jul":7,"aug":8,"sep":9,"oct":10,"nov":11,"dec":12}
# short+long month names
_MON_RE = r"(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)(?:uary|ch|il|e|y|ust|tember|ober|ember)?"

def yy_to_yyyy(yy: int, pivot: int = 69) -> int:
    return 2000 + yy if yy <= pivot else 1900 + yy

def _add(out: List[Tuple[str,int,int]], seen: Set[Tuple[int,int]], y: int, m: int):
    if not (1 <= m <= 12 and 1900 <= y <= 2100): return
    if (y, m) in seen: return
    lab = [k for k,v in _MON.items() if v==m][0].title() + str(y)[-2:]
    seen.add((y,m)); out.append((lab, y, m))

def extract_maturities(text: str, *, ref_date: date, pivot: int = 69) -> List[Tuple[str,int,int]]:
    t = text.lower()
    out: List[Tuple[str,int,int]] = []
    seen: Set[Tuple[int,int]] = set()

    # 1) MM/DD/YYYY or MM/DD/YY
    for m,d,y in re.findall(r"\b(\d{1,2})[/-](\d{1,2})[/-](\d{2,4})\b", t):
        mm, dd, yy = int(m), int(d), int(y)
        yyyy = yy_to_yyyy(yy, pivot) if len(y)==2 else yy
        if 1<=mm<=12 and 1<=dd<=31: _add(out, seen, yyyy, mm)

    # 2) YYYY-MM or YYYY/MM
    for y,m in re.findall(r"\b(19\d{2}|20\d{2})[/-](\d{1,2})\b", t):
        _add(out, seen, int(y), int(m))

    # 3) Month + Year words: Jan26 / Jan 2026 / January 2026
    for mon,yr in re.findall(rf"\b({_MON_RE})\s*([0-9]{{2}}|[0-9]{{4}})\b", t):
        mm = _MON.get(mon[:3]);  y = int(yr)
        _add(out, seen, yy_to_yyyy(y, pivot) if len(yr)==2 else y, mm)

    # 4) Compact MMMYY: jan26
    for mon,yr in re.findall(rf"\b({_MON_RE})(\d{{2}})\b", t):
        _add(out, seen, yy_to_yyyy(int(yr), pivot), _MON[mon[:3]])

    # 5) Bare month (NOT followed by a 2- or 4-digit year). Strikes after are fine.
    for mon in re.findall(rf"\b({_MON_RE})\b(?!\s*(?:\d{{2}}(?!\d)|\d{{4}}))", t):
        mm = _MON[mon[:3]]
        yyyy = ref_date.year if mm >= ref_date.month else ref_date.year + 1
        _add(out, seen, yyyy, mm)

    return out


def parse_quantity(text: str) -> int:
    """
    Find quantity as an integer. Supports comma-grouped values (e.g., 1,000).
    Prefers 'buy|sell <N>' if present; otherwise first standalone integer.
    Run this on the ORIGINAL text (do NOT strip commas first).
    """
    # Prefer explicit verb + number
    m = re.search(r'\b(?:buy|sell)\s+(-?\d{1,3}(?:,\d{3})*|\d+)\b', text, flags=re.I)
    if not m:
        # First standalone integer, comma-aware
        m = re.search(r'\b-?\d{1,3}(?:,\d{3})*\b', text)
    if not m:
        return 1
    return int(m.group(1).replace(',', ''))

def parse_quantity(text: str) -> int:
    m = re.search(r"\b-?\d{1,3}(?:,\d{3})*\b", text)
    return int(m.group(0).replace(",", "")) if m else 1

def extract_strikes(text: str, qty: int | None = None) -> list[float]:
    # Prefer slash pairs (handles decimals)
    m = re.search(r'(?<![A-Za-z])(\d+(?:\.\d+)?)\s*/\s*(\d+(?:\.\d+)?)(?!\S*[A-Za-z])', text)
    if m:
        return [float(m.group(1)), float(m.group(2))]
    # Otherwise standalone numbers (no commas), not glued to letters
    t = text.replace(",", "")
    nums = [float(x) for x in re.findall(r'(?<![A-Za-z])\d{2,5}(?:\.\d+)?\b', t)]
    # Drop the quantity if it was the first big number
    if qty and nums and int(nums[0]) == qty:
        nums = nums[1:]
    return nums

def parse_strategy_mvp(text: str,ref_date:date) -> dict:
    
    t = text.strip()
    qty = int(re.search(r'(-?\d[\d,]*)', t).group(1).replace(',', '')) if re.search(r'(-?\d[\d,]*)', t) else 1
    #qty=parse_quantity(t)
    sym = re.search(r'\b[A-Z]{1,10}\b', t).group(0) if re.search(r'\b[A-Z]{1,10}\b', t) else ""
    mats = re.findall(r'([A-Za-z]{3,9}\s*\d{2,4}|\d{1,2}/\d{1,2}/\d{2,4})', t)
    labels_year_month = extract_maturities(text,ref_date=ref_date)  # [(label, year, month), ...]
    maturities = [lab for (lab, _, _) in labels_year_month]  # keep simple labels for MVP

    strikes = [float(x) for x in  re.findall(r'(?<![A-Za-z])\d{2,5}(?:\.\d+)?\b',t)]
    strikes=extract_strikes(t,qty)
    ratio = (re.search(r'\b\d+(x\d+)+\b', t) or re.search(r'\b\d+x\d+\b', t))
    ratio = ratio.group(0) if ratio else "1x1"
    struct = None
    for s in ["collar","rr","cs","ps","straddle","strangle","calendar","diagonal"]:
        if re.search(rf'\b{s}\b', t, re.I): struct = s.lower(); break
    cp_match = re.search(r'\b[CPcp]{2,}\b', t)
    structure = {"cp_string": cp_match.group(0).upper()} if cp_match and not struct else (struct or "custom")
    ok = bool(qty and sym and mats and strikes and structure)
    return {"ok": ok, "quantity": qty, "symbol": sym, "maturities": maturities,
            "strikes": strikes, "ratio": ratio, "structure": structure}



def set_monthly_expiries_to_third_friday(
    legs: List[Dict[str, Any]],
    *,
    preserve_exact_dates: bool = True,
    holiday_adjust: bool = False,
    holiday_is_friday: Optional[callable] = None,
) -> None:
    """
    Mutates each leg in-place:
      - if leg['expiry']['iso'] is missing (or preserve_exact_dates=False),
        fill it with the 3rd Friday ISO for (year, month).
      - optional 'holiday_adjust': if the 3rd Friday is a holiday/closed, move to Thursday.
        Provide 'holiday_is_friday(d: date) -> bool' to flag those dates.

    legs[i]['expiry'] must have {'year': int, 'month': int, 'iso': str|None}.
    """
    for leg in legs:
        exp = leg.get("expiry") or {}
        y, m, iso = exp.get("year"), exp.get("month"), exp.get("iso")
        if not y or not m:
            continue  # nothing to do

        if iso and preserve_exact_dates:
            continue  # user already gave an exact date; leave it

        d = third_friday(y, m)

        # Optional: holiday adjustment (simple rule)
        if holiday_adjust and callable(holiday_is_friday) and holiday_is_friday(d):
            d = d - timedelta(days=1)  # move to Thursday

        exp["iso"] = d.isoformat()
        leg["expiry"] = exp

In [5]:
from datetime import date
as_of = date(2025, 9, 25)  # example: Sep 25, 2025

strategy_strings=[
    "i want 1000 jan26 jan27  MSFT 250.00/350.00 cs",
    "sell 5,000 feb 250 350 AAPL cs",
    "100 jun26 250/350 AAPL 1x2 cs",
    "1000 jan 100 200 300 AAPL 1x2x3 ccc"
]

stategy_parsing_results=[]
for strategy_string in strategy_strings:
    result=parse_strategy_mvp(strategy_string,as_of)
    print(strategy_string)
    print(result)

    stategy_parsing_results.append(result)

stategy_parsing_results


i want 1000 jan26 jan27  MSFT 250.00/350.00 cs
{'ok': True, 'quantity': 1000, 'symbol': 'MSFT', 'maturities': ['Jan26', 'Jan27'], 'strikes': [250.0, 350.0], 'ratio': '1x1', 'structure': 'cs'}
sell 5,000 feb 250 350 AAPL cs
{'ok': True, 'quantity': 5000, 'symbol': 'AAPL', 'maturities': ['Feb26'], 'strikes': [250.0, 350.0], 'ratio': '1x1', 'structure': 'cs'}
100 jun26 250/350 AAPL 1x2 cs
{'ok': True, 'quantity': 100, 'symbol': 'AAPL', 'maturities': ['Jun26'], 'strikes': [250.0, 350.0], 'ratio': '1x2', 'structure': 'cs'}
1000 jan 100 200 300 AAPL 1x2x3 ccc
{'ok': True, 'quantity': 1000, 'symbol': 'AAPL', 'maturities': ['Jan26'], 'strikes': [100.0, 200.0, 300.0], 'ratio': '1x2x3', 'structure': {'cp_string': 'CCC'}}


[{'ok': True,
  'quantity': 1000,
  'symbol': 'MSFT',
  'maturities': ['Jan26', 'Jan27'],
  'strikes': [250.0, 350.0],
  'ratio': '1x1',
  'structure': 'cs'},
 {'ok': True,
  'quantity': 5000,
  'symbol': 'AAPL',
  'maturities': ['Feb26'],
  'strikes': [250.0, 350.0],
  'ratio': '1x1',
  'structure': 'cs'},
 {'ok': True,
  'quantity': 100,
  'symbol': 'AAPL',
  'maturities': ['Jun26'],
  'strikes': [250.0, 350.0],
  'ratio': '1x2',
  'structure': 'cs'},
 {'ok': True,
  'quantity': 1000,
  'symbol': 'AAPL',
  'maturities': ['Jan26'],
  'strikes': [100.0, 200.0, 300.0],
  'ratio': '1x2x3',
  'structure': {'cp_string': 'CCC'}}]

In [10]:
expr = "i want 1000 jan26 jan27  MSFT 250.00/350.00 cs"
print(re.findall(r'(?<![A-Za-z])\d{2,5}(?:\.\d+)?\b', expr))

['1000', '250.00', '350.00']


In [3]:
from datetime import date
import sys, os
sys.path.append(os.path.abspath("../src"))

import strategy_normalize

# or import specific functions
from strategy_normalize import build_legs, third_friday

ref = date(2025, 9, 25)  # your “as-of” date

# Example 1: collar
mvp = {"quantity": 500, "symbol":"AAPL", "maturities":["Jan26"], "strikes":[250,300], "ratio":"1x1", "structure":"collar"}
print(build_legs(ref_date=ref, **mvp))

# Example 2: cs with slash strikes and month-only maturity
mvp = {"quantity": 100, "symbol":"AAPL", "maturities":["feb"], "strikes":[250,350], "ratio":None, "structure":"cs"}
# ref Sep 2025 → 'feb' becomes Feb 2026 (year rolled)
print(build_legs(ref_date=ref, **mvp))

# Example 3: cp_string with ratio propagation, multi-strikes
mvp = {"quantity": 1000, "symbol":"TSLA", "maturities":["Jan26"], "strikes":[200,210,230], "ratio":"1x2", "structure":{"cp_string":"PPC"}}
mvp_legs=build_legs(ref_date=ref, **mvp)
set_monthly_expiries_to_third_friday(mvp_legs["legs"])
print(mvp_legs)

# Example 4: calendar: two maturities, one strike
mvp = {"quantity": 25, "symbol":"NVDA", "maturities":["Jan26","Mar26"], "strikes":[950], "ratio":"1x1", "structure":"calendar"}
# cp sequence fallback ["P","C"]; sides default AUTO; maturities propagate correctly
print(build_legs(ref_date=ref, **mvp))

{'legs': [{'cp': 'P', 'strike': 250.0, 'side': 'BUY', 'ratio': 1, 'qty': 500, 'expiry': {'year': 2026, 'month': 1, 'iso': None, 'label': 'Jan26'}}, {'cp': 'C', 'strike': 300.0, 'side': 'SELL', 'ratio': 1, 'qty': -500, 'expiry': {'year': 2026, 'month': 1, 'iso': None, 'label': 'Jan26'}}], 'issues': [], 'notes': ''}
{'legs': [{'cp': 'C', 'strike': 250.0, 'side': 'BUY', 'ratio': 1, 'qty': 100, 'expiry': {'year': 2026, 'month': 2, 'iso': None, 'label': 'Feb26'}}, {'cp': 'C', 'strike': 350.0, 'side': 'SELL', 'ratio': 1, 'qty': -100, 'expiry': {'year': 2026, 'month': 2, 'iso': None, 'label': 'Feb26'}}], 'issues': [], 'notes': ''}


NameError: name 'set_monthly_expiries_to_third_friday' is not defined

In [14]:
a=build_legs(ref_date=ref, **mvp)
a["legs"][0]["expiry"]

{'year': 2026, 'month': 1, 'iso': None, 'label': 'Jan26'}