In [28]:
from repsheet_backend.common import DATA_DIR, db_connect, MEMBER_VOTES_TABLE, VOTES_HELD_TABLE, download_all_bill_texts
import httpx
import os
from os import path
from multiprocessing.pool import ThreadPool
from anthropic import Anthropic
from google import genai

## Enumerate every bill voted on by a current member

In [2]:
with db_connect() as db:
    bills = db.execute(
        "SELECT DISTINCT v.[Parliament], v.[Session], v.[Bill Number] "
        f"FROM {MEMBER_VOTES_TABLE} mv "
        f"LEFT JOIN {VOTES_HELD_TABLE} v ON v.[Vote ID] = mv.[Vote ID] "
        "WHERE [Bill Number] IS NOT NULL "
        "ORDER BY v.[Parliament] DESC "
    ).fetchall()

    bills = [tuple(row) for row in bills]
    with ThreadPool(8) as pool:
        results = pool.starmap(download_all_bill_texts, bills)

Downloaded C-240_1/C-240_E.xml from https://www.parl.ca/Content/Bills/441/Private/C-240/C-240_1/C-240_E.xml
Downloaded S-223_1/S-223_E.xml from https://www.parl.ca/Content/Bills/441/Private/S-223/S-223_1/S-223_E.xml
Downloaded C-262_1/C-262_E.xml from https://www.parl.ca/Content/Bills/432/Private/C-262/C-262_1/C-262_E.xml
Downloaded C-311_1/C-311_E.xml from https://www.parl.ca/Content/Bills/441/Private/C-311/C-311_1/C-311_E.xml
Downloaded C-225_1/C-225_E.xml from https://www.parl.ca/Content/Bills/432/Private/C-225/C-225_1/C-225_E.xml
Downloaded C-59_1/C-59_E.xml from https://www.parl.ca/Content/Bills/441/Government/C-59/C-59_1/C-59_E.xml
Downloaded S-223_3/S-223_E.xml from https://www.parl.ca/Content/Bills/441/Private/S-223/S-223_3/S-223_E.xml
Downloaded C-60_3/C-60_E.xml from https://www.parl.ca/Content/Bills/441/Government/C-60/C-60_3/C-60_E.xml
Downloaded C-79_3/C-79_E.xml from https://www.parl.ca/Content/Bills/441/Government/C-79/C-79_3/C-79_E.xml
Downloaded S-223_4/S-223_E.xml fro

In [27]:
i, j = 0, 0
for row, found in zip(bills, results):
    parliament, session, bill_number = row
    if parliament != 43 or session != 1:
        continue
    if not found:
        print(f"Bill text not found for {row}")
        i += 1
    else:
        # print(f"Bill text found for {row}")
        j += 1
i, j

(0, 2)

In [4]:

latest_bill_file = {}

for bill in bills:
    parliament, session, bill_number = bill
    if parliament != 44 or session != 1:
        continue
    max_reading = ""
    for reading in (1, 2, 3, 4):
        texts_path = path.join(DATA_DIR, f"bill_text/{parliament}/{session}/{bill_number}/{bill_number}_{reading}")
        for file in os.listdir(texts_path):
            assert file.endswith(".xml")
            filepath = path.join(texts_path, file)
            if path.getsize(filepath) > 0:
                max_reading = max(max_reading, filepath)
    if max_reading:
        latest_bill_file[bill] = max_reading


In [12]:
anthropic = Anthropic(
    api_key="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"
)

In [None]:
token_counts = {}

In [None]:
for bill, filepath in latest_bill_file.items():
    with open(filepath, "r") as f:
        text = f.read()
    response = anthropic.messages.count_tokens(
        model="claude-3-5-haiku-latest",
        messages=[
            {
                "role": "user",
                "content": text,
            }
        ],
    )
    token_counts[bill] = response.input_tokens
    print(bill, response.input_tokens)

(44, 1, 'C-79') 58121
(44, 1, 'C-380') 581
(44, 1, 'C-78') 4545
(44, 1, 'S-205') 7467
(44, 1, 'C-378') 1012
(44, 1, 'C-223') 1704
(44, 1, 'C-379') 1321
(44, 1, 'S-224') 756
(44, 1, 'C-323') 1190
(44, 1, 'C-65') 62999
(44, 1, 'C-69') 493300
(44, 1, 'C-40') 11744
(44, 1, 'C-75') 19501
(44, 1, 'C-74') 87315
(44, 1, 'C-70') 75211
(44, 1, 'C-332') 4814
(44, 1, 'C-277') 1667
(44, 1, 'C-20') 58678
(44, 1, 'C-293') 3513
(44, 1, 'C-317') 1969
(44, 1, 'C-377') 1298
(44, 1, 'C-353') 8867
(44, 1, 'C-64') 3687
(44, 1, 'C-356') 14459
(44, 1, 'C-368') 1510
(44, 1, 'C-49') 134963
(44, 1, 'C-59') 395404
(44, 1, 'C-58') 7380
(44, 1, 'C-381') 1384
(44, 1, 'C-270') 2813
(44, 1, 'C-375') 1443
(44, 1, 'C-351') 1586
(44, 1, 'C-29') 7051
(44, 1, 'S-209') 923
(44, 1, 'C-50') 9680
(44, 1, 'C-347') 966
(44, 1, 'C-68') 39823
(44, 1, 'C-67') 39379
(44, 1, 'C-365') 1415
(44, 1, 'C-35') 5375
(44, 1, 'C-320') 1830
(44, 1, 'C-321') 1044
(44, 1, 'C-354') 758
(44, 1, 'C-62') 1674
(44, 1, 'C-273') 675
(44, 1, 'S-202') 14

In [17]:
sum(token_counts.values())

3837016

In [16]:
sorted(token_counts.items(), key=lambda x: x[1], reverse=True)

[((44, 1, 'C-69'), 493300),
 ((44, 1, 'C-59'), 395404),
 ((44, 1, 'C-19'), 344079),
 ((44, 1, 'C-47'), 318498),
 ((44, 1, 'C-49'), 134963),
 ((44, 1, 'C-32'), 131247),
 ((44, 1, 'C-27'), 102031),
 ((44, 1, 'C-74'), 87315),
 ((44, 1, 'C-54'), 86864),
 ((44, 1, 'C-24'), 86712),
 ((44, 1, 'C-8'), 84716),
 ((44, 1, 'C-70'), 75211),
 ((44, 1, 'C-33'), 69721),
 ((44, 1, 'C-26'), 66269),
 ((44, 1, 'C-65'), 62999),
 ((44, 1, 'C-36'), 60453),
 ((44, 1, 'C-20'), 58678),
 ((44, 1, 'C-79'), 58121),
 ((44, 1, 'C-60'), 58068),
 ((44, 1, 'S-5'), 57948),
 ((44, 1, 'C-13'), 56142),
 ((44, 1, 'C-21'), 52609),
 ((44, 1, 'C-15'), 49868),
 ((44, 1, 'C-6'), 42789),
 ((44, 1, 'C-43'), 42328),
 ((44, 1, 'S-12'), 40465),
 ((44, 1, 'C-2'), 40169),
 ((44, 1, 'C-68'), 39823),
 ((44, 1, 'C-67'), 39379),
 ((44, 1, 'C-44'), 39272),
 ((44, 1, 'C-16'), 37013),
 ((44, 1, 'C-11'), 36840),
 ((44, 1, 'C-18'), 30845),
 ((44, 1, 'C-31'), 22176),
 ((44, 1, 'C-55'), 21849),
 ((44, 1, 'C-75'), 19501),
 ((44, 1, 'C-25'), 18115)

In [32]:
google_ai = genai.Client(
    vertexai=True, project='repsheet-app-prod', location='us-central1'
)

In [None]:
token_counts_google = {}

for bill, filepath in list(latest_bill_file.items())[:1]:
    with open(filepath, "r") as f:
        text = f.read()
    response = google_ai.models.count_tokens(
        model="gemini-2.0-flash",
        contents=text
    )
    
    token_counts_google[bill] = response.total_tokens
    print(bill, response.total_tokens)

(44, 1, 'C-79') 53801


In [35]:
token_counts[(44, 1, 'C-79')]

58121

In [None]:


"""
curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=GEMINI_API_KEY" \
-H 'Content-Type: application/json' \
-X POST \
-d '{
  "contents": [{
    "parts":[{"text": "Explain how AI works"}]
    }]
   }'
"""