In [None]:
import time, random, json, openai
from openai import RateLimitError, APIError, Timeout
import pandas as pd
import os
from dotenv import load_dotenv
load_dotenv() 
api_key = os.getenv("OPENAI_API_KEY")
df = pd.read_csv("ground_truth.csv")

SYS_MSG = (
    """
You are a senior real-estate underwriting analyst. Your task is to determine where a piece of text came from. Possible sources:
(A) Term Sheet, (B) Appraisal, (C) Offering Memo, (D) Public internet/press release, (E) Synthesized/combined from >1 source, (F) Unknown.
When unsure default to F. Return a JSON object with keys: source_label, confidence_0_1, and explanation (≤40 words).
"""
)

FEW_SHOT = """
Text: "The loan proceeds will be used to refinance the existing debt on the subject property. \
The note will be non-recourse with standard carve-outs, interest rate fixed at 6.25 % for a 5-year term, \
and maximum LTC of 70 %."

Prediction: {"source_label":"Term Sheet",
             "confidence_0_1":0.94,
             "explanation":"Financing terms, LTC, non-recourse language typical of a term sheet."}

Text: "The highest and best use analysis indicates the current multifamily configuration yields the \
greatest economic return.  Using the Income Capitalization Approach, the as-is market value is \
concluded at $23,450,000 as of 1 May 2024."

Prediction: {"source_label":"Appraisal",
             "confidence_0_1":0.92,
             "explanation":"'Highest and best use' and valuation conclusion language unique to appraisals."}
"""

def make_prompt(variant: str, snippet: str) -> list[dict]:
    if variant == "zeroshot":
        user = f'Classify the following text and output JSON only:\n"""{snippet}"""'
        return [{"role":"system","content":SYS_MSG},
                {"role":"user","content":user}]
    
    if variant == "fewshot":
        user = FEW_SHOT + f'\nText: """{snippet}"""\nPrediction:'
        return [{"role":"system","content":SYS_MSG},
                {"role":"user","content":user}]
    
    if variant == "cot":
        user = user = (
    "First, reason step-by-step which textual features reveal the document type "
    "(e.g., financing terms, valuation jargon, marketing phrases). "
    "Then, on a **new line**, output **only** the JSON object with keys "
    "`source_label`, `confidence_0_1`, and `explanation` (≤40 words).\n"
    f'Text: """{snippet}"""'
)
        return [{"role":"system","content":SYS_MSG},
                {"role":"user","content":user}]
    raise ValueError("unknown variant")

In [None]:
def call_chat_completion(messages: list[dict],
                          model: str = "gpt-4o",
                          max_retries: int = 6,
                          base_delay: float = 1.0):
    """Retries on rate-limit / transient errors with exponential back-off + jitter."""
    for attempt in range(max_retries):
        try:
            return openai.ChatCompletion.create(
                model=model,
                messages=messages,
                temperature=0
            )
        except (RateLimitError, APIError, Timeout) as e:
            if attempt == max_retries - 1:
                raise
            sleep_sec = base_delay * (2 ** attempt) + random.uniform(0, 1)
            print(f"{type(e).__name__}: retrying in {sleep_sec:.1f}s…")
            time.sleep(sleep_sec)


In [None]:
records = []

for _, row in df.iterrows():
    snippet, true_label = row["snippet"], row["label"]
    for variant in ["zeroshot", "fewshot", "cot"]:
        prompt = make_prompt(variant, snippet)
        rsp = call_chat_completion(prompt)
        out = json.loads(rsp.choices[0].message.content.strip())

        records.append({
            "id": row["id"],
            "prompt_variant": variant,
            "ground_truth": true_label,
            "predicted": out.get("source_label"),
            "confidence": out.get("confidence_0_1"),
            "explanation": out.get("explanation")
        })

results = pd.DataFrame(records)
results.to_csv("run_log.csv", index=False)
print("✓ Completed — results saved to run_log.csv")


In [None]:
from sklearn.metrics import classification_report, confusion_matrix

print(classification_report(results["ground_truth"], results["predicted"]))

cm = pd.DataFrame(
    confusion_matrix(results["ground_truth"], results["predicted"]),
    index=sorted(results["ground_truth"].unique()),
    columns=sorted(results["ground_truth"].unique())
)
cm.to_csv("confusion_matrix.csv")
cm
