# New Project

## Setup

### Iteration 0

In [11]:
import requests
import pandas as pd
import re

# API Configuration
API_KEY = "EJrUQCHuJ0IahRmYL00NZmEFar04efTqfRWx1EUe"  # Replace with your actual API key
SEARCH_URL = "https://api.govinfo.gov/search"
OUTPUT_FILE = "house_bills_117th_congress.csv"

# Step 1: Fetch House Bills (H.R.) from 117th Congress
def fetch_house_bills():
    headers = {"Content-Type": "application/json"}
    bills = []
    offsetMark = "*"  # First request should use "*"

    while True:
        query_payload = {
            "query": "collection:BILLS congress:117 billtype:HR",
            "offsetMark": offsetMark,
            "pageSize": 100,  # Number of results per request
            "sort": "dateDesc",
            "format": "json"
        }

        response = requests.post(
            SEARCH_URL, json=query_payload, headers=headers, params={"api_key": API_KEY}
        )

        if response.status_code != 200:
            print(f"Failed to fetch data: {response.status_code}")
            print(response.text)  # Print API error details
            return []

        data = response.json()
        bills.extend(data.get("results", []))

        # Update offsetMark for next page
        offsetMark = data.get("offsetMark", None)

        # Stop if there are no more pages
        if not offsetMark:
            break

    return bills

# Step 2: Extract metadata from the API response
def extract_metadata(bills):
    extracted_data = []
    
    for bill in bills:
        package_id = bill.get("packageId", "")
        title = bill.get("title", "")
        last_modified = bill.get("lastModified", "")
        result_link = bill.get("resultLink", "")
        
        # Extract Bill Number from packageId (e.g., "BILLS-117hr99ih" -> "HR 99")
        match = re.search(r"117hr(\d+)", package_id)
        bill_number = f"HR {match.group(1)}" if match else "Unknown"

        # Extract download links
        download_links = bill.get("download", {})
        pdf_link = download_links.get("pdfLink", "")
        txt_link = download_links.get("txtLink", "")
        xml_link = download_links.get("xmlLink", "")

        extracted_data.append({
            "Congress Number": 117,
            "Bill Number": bill_number,
            "Short Title": title,
            "Last Action Date": last_modified,
            "Bill Summary Link": result_link,
            "PDF Link": pdf_link,
            "TXT Link": txt_link,
            "XML Link": xml_link
        })
    
    return extracted_data

# Main Execution
bills = fetch_house_bills()
if not bills:
    print("No bills found or error in API call.")
else:
    metadata = extract_metadata(bills)
    
    # Save to CSV
    df = pd.DataFrame(metadata)
    df.to_csv(OUTPUT_FILE, index=False)

    print(f"Saved metadata to {OUTPUT_FILE}")


Saved metadata to house_bills_117th_congress.csv


### Iteration 1

In [10]:
import requests
import json

# API Configuration
API_KEY = "EJrUQCHuJ0IahRmYL00NZmEFar04efTqfRWx1EUe"  # Replace with your actual API key
SEARCH_URL = "https://api.govinfo.gov/search"

# Fetch a single bill for debugging
def fetch_single_bill():
    headers = {"Content-Type": "application/json"}
    query_payload = {
        "query": "collection:BILLS congress:117 billtype:HR",
        "offsetMark": "*",  # First request should use "*"
        "pageSize": 1,  # Fetch only one bill for inspection
        "sort": "dateDesc",
        "format": "json"
    }

    response = requests.post(
        SEARCH_URL, json=query_payload, headers=headers, params={"api_key": API_KEY}
    )

    if response.status_code != 200:
        print(f"Failed to fetch data: {response.status_code}")
        print(response.text)  # Print API error details
        return None

    return response.json()

# Run the request and print the raw response
raw_api_response = fetch_single_bill()
print(json.dumps(raw_api_response, indent=4))  # Pretty-print JSON


{
    "results": [
        {
            "title": "Citizen Legislature Anti-Corruption Reform of Congress Act; CLEAN Congress Act",
            "packageId": "BILLS-117hr99ih",
            "granuleId": null,
            "lastModified": "2024-06-06T19:38:38Z",
            "governmentAuthor": [
                "Congress",
                "House of Representatives"
            ],
            "dateIssued": "2021-01-04",
            "collectionCode": "BILLS",
            "resultLink": "https://api.govinfo.gov/packages/BILLS-117hr99ih/summary",
            "dateIngested": "2021-01-20",
            "download": {
                "premisLink": "https://api.govinfo.gov/packages/BILLS-117hr99ih/premis",
                "xmlLink": "https://api.govinfo.gov/packages/BILLS-117hr99ih/xml",
                "txtLink": "https://api.govinfo.gov/packages/BILLS-117hr99ih/htm",
                "zipLink": "https://api.govinfo.gov/packages/BILLS-117hr99ih/zip",
                "modsLink": "https://api.govinfo.g