In [None]:
# If needed (skip if already installed):
# !pip install openai python-dotenv tenacity python-dateutil dateparser

import os, json, time, hashlib
from datetime import date
from tenacity import retry, stop_after_attempt, wait_exponential
from dotenv import load_dotenv
load_dotenv()  # loads OPENAI_API_KEY from .env

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
assert OPENAI_API_KEY, "Set OPENAI_API_KEY in .env or env vars"

In [None]:
from openai import OpenAI
client = OpenAI(api_key=OPENAI_API_KEY)
MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
JSON_MAX_TOKENS = int(os.getenv("JSON_MAX_TOKENS", "2000"))

def _prompt_hash(s: str) -> str:
    return hashlib.sha256(s.encode()).hexdigest()[:10]

def safe_json_parse(raw: str):
    s = raw.strip()
    if s.startswith("```"):
        s = s.strip("`"); s = s[s.find("\n")+1:]; 
        if s.endswith("```"): s = s[:s.rfind("```")]
    try:
        obj = json.loads(s)
        if isinstance(obj, dict) and "events" in obj and isinstance(obj["events"], list):
            return obj["events"]
        if isinstance(obj, list):
            return obj
        return []
    except Exception:
        return []

@retry(stop=stop_after_attempt(3), wait=wait_exponential(min=1, max=8))
def call_openai_json(prompt: str):
    t0 = time.time()
    resp = client.chat.completions.create(
        model=MODEL,
        temperature=0,
        response_format={"type": "json_object"},  # forces valid JSON object
        max_tokens=JSON_MAX_TOKENS,
        messages=[
            {"role": "system", "content": "You are a parser that outputs ONLY JSON and follows the schema exactly."},
            {"role": "user",   "content": prompt},
        ],
    )
    raw_text = resp.choices[0].message.content or "{}"
    diag = {
        "model": resp.model,
        "latency_ms": int((time.time()-t0)*1000),
        "prompt_hash": _prompt_hash(prompt),
        "tokens_used": getattr(resp, "usage", None).total_tokens if getattr(resp, "usage", None) else None,
    }
    return raw_text, diag

In [None]:
def build_prompt(user_text: str, default_tz: str, today_iso: str) -> str:
    return f"""
Return ONLY a JSON object with one key "events": an array of event objects.

Event schema (each object):
- title: string
- event_date: string (YYYY-MM-DD)
- event_time: string ("" or HH:MM 24h; "" = all-day)
- end_date: string (YYYY-MM-DD)
- timezone: string ("" or valid IANA; leave "" unless explicitly provided)
- location: string
- invitees: array of strings (emails)
- notifications: array of objects {{ "method": "email|popup", "minutes": integer }}
- recurrence: string (RRULE or "")
- confidence: number 0..1 (self-estimate)

Rules:
- Treat {today_iso} as "today" for relative phrases like "tomorrow" or "next Friday".
- Normalize times: 12am→00:00, noon/12pm→12:00, always 24-hour HH:MM.
- Do NOT invent timezones; leave "" unless clearly given.
- If multiple events are present, include them all.
- No prose, no markdown fences.

Few-shot examples (abbreviated):
Input:
"Coffee with Maya Tue 9:30am at DC Library."
Output:
{{"events":[{{"title":"Coffee with Maya","event_date":"2025-09-02","event_time":"09:30","end_date":"2025-09-02","timezone":"","location":"DC Library","invitees":[],"notifications":[],"recurrence":"","confidence":0.85}]}}}

Input:
"Study group Monday 7-8pm, Room 202."
Output:
{{"events":[{{"title":"Study group","event_date":"2025-09-01","event_time":"19:00","end_date":"2025-09-01","timezone":"","location":"Room 202","invitees":[],"notifications":[],"recurrence":"", "confidence":0.8}]}}}

Now parse this input into the same JSON object shape:
{user_text}
""".strip()

In [None]:
def try_parse(user_text: str, default_tz: str = "", today: date = date.today()):
    prompt = build_prompt(user_text, default_tz, today.isoformat())
    raw, diag = call_openai_json(prompt)
    records = safe_json_parse(raw)
    return records, diag, raw

In [None]:
txt1 = "Dinner with John tomorrow 7pm at Boston Pizza; alex@example.com; remind me 10 minutes before."
recs1, diag1, raw1 = try_parse(txt1)
print(diag1); print(json.dumps(recs1, indent=2)[:1000])

# course schedule block (paste your long example)
txt2 = """Course Schedule
Important: ALL TIMES EASTERN
Week 1 ... (paste the full block here)"""
recs2, diag2, raw2 = try_parse(txt2)
print(diag2); print(f"Events parsed: {len(recs2)}"); print(json.dumps(recs2[:3], indent=2))

In [None]:
os.makedirs("tmp_llm", exist_ok=True)
with open("tmp_llm/last_raw.json","w") as f: f.write(raw2)
with open("tmp_llm/last_parsed.json","w") as f: json.dump(recs2, f, indent=2)