In [4]:
from typing import Optional, Literal
from repsheet_backend.common import MEMBER_VOTES_TABLE, BILLS_TABLE, VOTES_HELD_TABLE, MEMBERS_TABLE, db_connect, JUSTIN, PIERRE
from repsheet_backend.bills import BillId, BillIssues, BillSummary
from repsheet_backend.genai import generate_text
from pydantic import BaseModel
import json
import asyncio
import re

In [None]:
with open("prompts/summarize-member/001.txt", "r") as f:
    prompt_template = f.read()

class BillVotingRecord(BaseModel):
    summary: str
    billID: str
    billNumber: str
    voted: Literal["yea", "nay", "abstain"]
    issues: BillIssues


class MemberSummary(BaseModel):
    summary: str
    issues: BillIssues


MEMBER_BILL_VOTING_QUERY = f"""
SELECT 
    b.[Bill ID] AS bill_id,
    b.[Bill Number] AS bill_number,
    b.[Summary] AS full_summary,
    mv.[Member Voted] AS voted
FROM {MEMBER_VOTES_TABLE} AS mv
JOIN {VOTES_HELD_TABLE} v
    ON mv.[Vote ID] = v.[Vote ID]
JOIN {BILLS_TABLE} AS b
    ON v.[Bill ID] = b.[Bill ID]
WHERE
    mv.[Member ID] = :member_id
"""

def get_member_voting_record(member_id: str) -> list[BillVotingRecord]:
    with db_connect() as db:
        rows = db.execute(MEMBER_BILL_VOTING_QUERY, {"member_id": member_id}).fetchall()

    voting_record: list[BillVotingRecord] = []
    for row in rows:
        full_summary = BillSummary.model_validate_json(row["full_summary"])
        voted = row["voted"].lower() if row["voted"] else "abstain" 
        voting_record.append(
            BillVotingRecord(
                summary=full_summary.summary,
                billID=row["bill_id"],
                billNumber=row["bill_number"],
                voted=voted,
                issues=full_summary.issues,
            )
        )
    return voting_record


def get_member_summarisation_prompt(member_id: str) -> str:
    voting_record = get_member_voting_record(member_id)
    voting_record = [vote.model_dump(mode="json") for vote in voting_record]
    voting_record = json.dumps(voting_record, indent=2)
    return prompt_template.replace("{{RAW_INPUT_DATA}}", voting_record)

value_meant_to_be_part_of_previous_key_regex = re.compile(r'",\s*"([^"]+)"\s*}')

def fix_crappy_json(json_str: str) -> str:
    json_str = value_meant_to_be_part_of_previous_key_regex.sub(r' \1"}', json_str)
    json_str = json_str.replace("\\\n", "\\n")
    return json_str


async def generate_member_summary(member_id: str) -> MemberSummary:
    prompt = get_member_summarisation_prompt(member_id)
    summary = await generate_text(prompt)
    assert summary is not None
    summary = summary.removeprefix("```json\n").removesuffix("\n```")
    summary = fix_crappy_json(summary)
    try:    
        summary = MemberSummary.model_validate_json(summary)
    except Exception as e:
        with open(f"{member_id}.json", "w") as f:
            f.write(summary)
        raise e
    return summary

In [12]:
with db_connect() as db:
    all_member_ids = [row[0] for row in db.execute(f"SELECT DISTINCT [Member ID] FROM {MEMBERS_TABLE}").fetchall()]

summaries = await asyncio.gather(*[
    generate_member_summary(member_id)
    for member_id in all_member_ids
])

member_summaries = [
    {"member_id": member_id, "summary": summary.model_dump_json()}
    for member_id, summary in zip(all_member_ids, summaries)
]

ValidationError: 1 validation error for MemberSummary
  Invalid JSON: expected `,` or `}` at line 2 column 965 [type=json_invalid, input_value='{\n  "summary": "This Me...he vulnerable."\n  }\n}', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid

In [None]:


with db_connect() as db:
    # Insert the new summaries
    db.executemany(f"UPDATE {MEMBERS_TABLE} SET Summary = :summary WHERE [Member ID] = :member_id", member_summaries)
