In [1]:
from repsheet_backend.common import DATA_DIR, db_connect, MEMBER_VOTES_TABLE, VOTES_HELD_TABLE, GCP_BILLING_PROJECT, download_all_bill_texts, BillId
import httpx
import os
from os import path
from multiprocessing.pool import ThreadPool
from anthropic import Anthropic
from google import genai
from typing import Optional

## Enumerate every bill voted on by a current member

In [2]:
with db_connect() as db:
    bills = db.execute(
        "SELECT DISTINCT v.[Parliament], v.[Session], v.[Bill Number] "
        f"FROM {MEMBER_VOTES_TABLE} mv "
        f"LEFT JOIN {VOTES_HELD_TABLE} v ON v.[Vote ID] = mv.[Vote ID] "
        "WHERE [Bill Number] IS NOT NULL "
        "ORDER BY v.[Parliament] DESC "
    ).fetchall()

    bills = [tuple(row) for row in bills]
    with ThreadPool(8) as pool:
        results = pool.starmap(download_all_bill_texts, bills)

In [3]:
i, j = 0, 0
for row, found in zip(bills, results):
    parliament, session, bill_number = row
    if parliament != 44 or session != 1:
        continue
    if not found:
        print(f"Bill text not found for {row}")
        i += 1
    else:
        # print(f"Bill text found for {row}")
        j += 1
i, j

(0, 144)

In [4]:
def get_latest_bill_text_path(bill: BillId) -> Optional[str]:
    parliament, session, bill_number = bill
    latest_reading_path = ""
    for reading in (1, 2, 3, 4):
        texts_path = path.join(DATA_DIR, f"bill_text/{parliament}/{session}/{bill_number}/{bill_number}_{reading}")
        for file in os.listdir(texts_path):
            assert file.endswith(".xml")
            filepath = path.join(texts_path, file)
            if path.getsize(filepath) > 0:
                latest_reading_path = max(latest_reading_path, filepath)
    if latest_reading_path:
        return latest_reading_path
    return None
    

In [32]:
google_ai = genai.Client(
    vertexai=True, project='repsheet-app-prod', location='us-central1'
)

In [None]:
with open("prompts/summarize-bill/001.txt", "r") as f:
    prompt_template = f.read()

def summarise_bill(bill, model="gemini-2.0-flash"):
    xml_path = get_latest_bill_text_path(bill)
    if xml_path is None:
        return None
    with open(xml_path, "r") as f:
        xml_text = f.read()
    prompt = prompt_template + xml_text
    response = google_ai.models.generate_content(
        model=model,
        contents=prompt
    )
    return response.text


In [None]:
# token_counts_google = {}

# for bill, filepath in list(latest_bill_file.items())[:1]:
#     with open(filepath, "r") as f:
#         text = f.read()
#     response = google_ai.models.count_tokens(
#         model="gemini-2.0-flash",
#         contents=text
#     )
    
#     token_counts_google[bill] = response.total_tokens
#     print(bill, response.total_tokens)

(44, 1, 'C-79') 53801
