In [None]:
from repsheet_backend.genai import generate_text, estimate_cost_usd_input_only
from repsheet_backend.summarize_bills import get_latest_bill_text_path, get_every_bill_voted_on_by_a_member, BillId, BillSummary
from repsheet_backend.common import db_connect, BILLS_TABLE
from typing import Optional
import asyncio
import re
import json
from pydantic import BaseModel

### Spec for the summary the AI is supposed to be generating

In [2]:
with open("prompts/summarize-bill/001.txt", "r") as f:
    prompt_template = f.read()

xref_external_regex = re.compile(r"<XRefExternal[^>]*>(.*?)<\/XRefExternal>")

def simplify_bill_xml(xml_text: str) -> str:
    # The ichor permeates MY FACE MY FACE
    # Remove all the XRefExternal tags
    return xref_external_regex.sub(r"\1", xml_text)

def get_bill_summarisation_prompt(bill: BillId) -> Optional[str]:
    xml_path = get_latest_bill_text_path(bill)
    if xml_path is None:
        return None
    with open(xml_path, "r") as f:
        xml_text = f.read()
    xml_text = simplify_bill_xml(xml_text)
    return prompt_template.replace("{{BILL_XML}}", xml_text)

async def summarise_bill(bill) -> Optional[str]:
    prompt = get_bill_summarisation_prompt(bill)
    if prompt is None:
        return None
    return await generate_text(prompt)

async def estimate_summarise_bill_cost(bill) -> float:
    prompt = get_bill_summarisation_prompt(bill)
    if prompt is None:
        return 0.0
    return await estimate_cost_usd_input_only(prompt)

In [3]:
# costs = await asyncio.gather(
#     *[estimate_summarise_bill_cost(bill) for bill in get_every_bill_voted_on_by_a_member()]
# )

# sum(cost for cost in costs if cost is not None)

In [4]:
bills = get_every_bill_voted_on_by_a_member()

summaries = await asyncio.gather(
    *[summarise_bill(bill) for bill in bills]
)

Generating text with gemini-2.0-flash (9892643 chars)


In [None]:

def cleanup_and_validate_json(json_text: str) -> str:
    orig_text = json_text
    # Remove trailing commas before closing braces
    json_text = trailing_comma_regex.sub("}", json_text)
    # Add any missing commas
    json_text = missing_comma_regex.sub('", "', json_text)
    # Never seen this as an escape character before, but the AI seems to think it's real
    json_text = json_text.replace("\\$", "$").replace("\\$", "$")
    try:
        obj = BillSummary.model_validate_json(json_text)
    except Exception as e:
        print(json_text)
        raise e
    return obj.model_dump_json()

bill_summaries = []

for bill, summary in zip(bills, summaries):
    if summary is None:
        print(f"Bill {bill} failed to summarise, too thicc")
        continue
    summary = summary.removeprefix("```json\n").removesuffix("\n```")
    summary = cleanup_and_validate_json(summary)
    bill_summaries.append({
        "bill_id": str(bill),
        "summary": summary
    })

Bill 41-1-C-13 failed to summarise, too thicc


In [6]:
with db_connect() as db:
    # Insert the new summaries
    db.executemany(f"UPDATE {BILLS_TABLE} SET Summary = :summary WHERE [Bill ID] = :bill_id", bill_summaries)
