In [None]:
# modules
import os 
import json
import requests
import pandas as pd
from tqdm import tqdm
# congressional api functions
from congress_api_functions import *
# environment vars
api_key = os.environ["CONGRESS_API_KEY"]

In [None]:
congress = "116"
data_path = "../data/congress_api_data_10_21/"

text_directory = os.path.join(data_path, congress, "text")
os.makedirs(text_directory, exist_ok=True)

metadata_directory = os.path.join(data_path, congress, "metadata")
os.makedirs(metadata_directory, exist_ok=True)

save_every = 500
offset = "9000"

In [None]:
metadata_list = []
missing_policy_area = []
last_save = 0

# iterate over all bills in a given congressional session
for i, bill in enumerate(tqdm(generate_all_bills(congress=congress,limit="250",offset=offset,api_key=api_key,max_bills=None))):

    # grab bill metadata and create bill id
    data = get_bill_metadata(bill, api_key)
    bill_id = f"{bill['congress']}-{bill['type']}{bill['number']}"

    # check for policyArea to identify housing bills. 
    # if there's no policy area, append the bill to a list of missing and continue to the next
    if "policyArea" not in data:
        missing_policy_area.append({
            "bill_id": bill_id,
            "data": bill,
            "metadata": data
        })
        continue 

    # otherwise, look for housing and community development in the policy areas
    if "Housing and Community Development" in data.get("policyArea", {}).values():

        try:
            # save the bill's text
            text_url = get_bill_text_url(data, api_key)

            file_name = text_url.split("/")[-1]
            file_path = os.path.join(text_directory, file_name)
            save_bill_text(text_url, file_path, api_key)

        except Exception as e:
            print(f"Error downloading bill text: {bill_id}: {e}")
            continue 

        try:
            # append all the data and metadata 
            metadata_list.append({
                "bill_id":bill_id,
                "data":bill,
                "metadata":data,
                "text_url":text_url,
                "text_file":file_name
            })
        except Exception as e:
            print(f"Error appending bill data: {bill_id}: {e}")
            continue

    # save the metadata periodically (according to the save every variable)
    if (i + 1) % save_every == 0:
        # grab the right indicies
        batch_start = int(offset) + i - (save_every - 1)
        batch_end = int(offset) + i
        # only save if housing bills were saved
        if metadata_list:  
            batch_file = os.path.join(metadata_directory, f"_{batch_start}_{batch_end}.json")
            with open(batch_file, "w") as f:
                json.dump(metadata_list, f, indent=2)
            print(f"Saved batch {batch_start}_{batch_end} with {len(metadata_list)} bills")
            metadata_list = []  # reset for next batch
        else:
            print(f"Skipped saving batch {batch_start}_{batch_end} (no housing bills)")

# save remaining metadata
if metadata_list:
    batch_start = int(offset) + i - (len(metadata_list) - 1)
    batch_end = int(offset) + i
    batch_file = os.path.join(metadata_directory, f"_{batch_start}_{batch_end}.json")
    with open(batch_file, "w") as f:
        json.dump(metadata_list, f, indent=2)
    print(f"Saved final batch {batch_start}_{batch_end} with {len(metadata_list)} bills")