In [1]:
from repsheet_backend.genai import generate_text, estimate_cost_usd_input_only
from repsheet_backend.bills import get_latest_bill_text_path, get_every_bill_voted_on_by_a_member, BillId
from repsheet_backend.common import db_connect, BILLS_TABLE
from typing import Optional
import asyncio
import re
import json

In [3]:
with open("prompts/summarize-bill/001.txt", "r") as f:
    prompt_template = f.read()

def get_bill_summarisation_prompt(bill: BillId) -> Optional[str]:
    xml_path = get_latest_bill_text_path(bill)
    if xml_path is None:
        return None
    with open(xml_path, "r") as f:
        xml_text = f.read()
    return prompt_template.replace("{{BILL_XML}}", xml_text)

google_ai_api_semaphore = asyncio.Semaphore(16)

async def summarise_bill(bill) -> Optional[str]:
    prompt = get_bill_summarisation_prompt(bill)
    if prompt is None:
        return None
    async with google_ai_api_semaphore:
        return await generate_text(prompt)

async def estimate_summarise_bill_cost(bill) -> float:
    prompt = get_bill_summarisation_prompt(bill)
    if prompt is None:
        return 0.0
    async with google_ai_api_semaphore:
        return await estimate_cost_usd_input_only(prompt)

In [3]:
# costs = await asyncio.gather(
#     *[estimate_summarise_bill_cost(bill) for bill in get_every_bill_voted_on_by_a_member()]
# )

# sum(cost for cost in costs if cost is not None)

In [5]:
bills = get_every_bill_voted_on_by_a_member()

summaries = await asyncio.gather(
    *[summarise_bill(bill) for bill in bills]
)

Generating text with gemini-2.0-flash (1857641 chars)
Generating text with gemini-2.0-flash (195942 chars)
Generating text with gemini-2.0-flash (164567 chars)
Generating text with gemini-2.0-flash (1148594 chars)
Received response from gemini-2.0-flash (3754 chars)
Received response from gemini-2.0-flash (3940 chars)
Generating text with gemini-2.0-flash (10541 chars)
Generating text with gemini-2.0-flash (9870 chars)
Generating text with gemini-2.0-flash (144420 chars)
Generating text with gemini-2.0-flash (18307 chars)
Generating text with gemini-2.0-flash (13470 chars)
Generating text with gemini-2.0-flash (43567 chars)
Generating text with gemini-2.0-flash (9053 chars)
Generating text with gemini-2.0-flash (17146 chars)
Generating text with gemini-2.0-flash (8640 chars)
Generating text with gemini-2.0-flash (6971 chars)
Received response from gemini-2.0-flash (878 chars)
Received response from gemini-2.0-flash (1430 chars)
Received response from gemini-2.0-flash (1146 chars)
Recei

In [13]:
trailing_comma_regex = re.compile(r",\s*}")

def cleanup_and_validate_json(json_text: str) -> str:
    # Remove trailing commas before closing braces
    json_text = trailing_comma_regex.sub("}", json_text)
    # Never seen this as an escape character before, but the AI seems to think it is
    json_text = json_text.replace("\\$", "$")
    try:
        json_obj = json.loads(json_text)
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")
        print(json_text)
        raise e
    json_text = json.dumps(json_obj)
    return json_text

bill_summaries = []

for bill, summary in zip(bills, summaries):
    if summary is None:
        print(f"Bill {bill} failed to summarise, too thicc")
        continue
    summary = summary.removeprefix("```json\n").removesuffix("\n```")
    summary = cleanup_and_validate_json(summary)
    bill_summaries.append({
        "bill_id": str(bill),
        "summary": summary
    })

Bill 41-1-C-13 failed to summarise, too thicc


In [None]:
with db_connect() as db:
    # Insert the new summaries
    db.executemany(f"UPDATE {BILLS_TABLE} SET Summary = :summary WHERE [Bill ID] = :bill_id", bill_summaries)


IntegrityError: NOT NULL constraint failed: bills.Parliament