In [None]:
from repsheet_backend.genai import generate_text, estimate_cost_usd_input_only
from repsheet_backend.bills import get_latest_bill_text_path, get_every_bill_voted_on_by_a_member, BillId
from repsheet_backend.common import db_connect, BILLS_TABLE
from typing import Optional
import asyncio
import re
import json

In [2]:
with open("prompts/summarize-bill/001.txt", "r") as f:
    prompt_template = f.read()

def get_bill_summarisation_prompt(bill: BillId) -> Optional[str]:
    xml_path = get_latest_bill_text_path(bill)
    if xml_path is None:
        return None
    with open(xml_path, "r") as f:
        xml_text = f.read()
    return prompt_template + xml_text

google_ai_api_semaphore = asyncio.Semaphore(16)

async def summarise_bill(bill) -> Optional[str]:
    prompt = get_bill_summarisation_prompt(bill)
    if prompt is None:
        return None
    async with google_ai_api_semaphore:
        return await generate_text(prompt)

async def estimate_summarise_bill_cost(bill) -> float:
    prompt = get_bill_summarisation_prompt(bill)
    if prompt is None:
        return 0.0
    async with google_ai_api_semaphore:
        return await estimate_cost_usd_input_only(prompt)

In [3]:
# costs = await asyncio.gather(
#     *[estimate_summarise_bill_cost(bill) for bill in get_every_bill_voted_on_by_a_member()]
# )

# sum(cost for cost in costs if cost is not None)

In [None]:
bills = get_every_bill_voted_on_by_a_member()

summaries = await asyncio.gather(
    *[summarise_bill(bill) for bill in bills]
)

In [8]:
trailing_comma_regex = re.compile(r",\s*}")

def cleanup_and_validate_json(json_text: str) -> str:
    # Remove trailing commas before closing braces
    json_text = trailing_comma_regex.sub("}", json_text)
    json_obj = json.loads(json_text)
    json_text = json.dumps(json_obj)
    return json_text

bill_summaries: list[tuple[str, str]] = []

for bill, summary in zip(bills, summaries):
    if summary is None:
        continue
    summary = summary.removeprefix("```json\n").removesuffix("\n```")
    summary = cleanup_and_validate_json(summary)
    bill_summaries.append((str(bill), summary))

In [9]:
with db_connect() as db:
    # Clear the table before inserting new summaries
    db.execute(f"DELETE FROM {BILLS_TABLE}")

    # Insert the new summaries
    db.executemany(f"INSERT INTO {BILLS_TABLE} ([Bill ID], Summary) VALUES (?, ?)", bill_summaries)
