In [2]:
import requests
import json

# Read API key from file
with open('../api/congress_gov.txt', 'r') as f:
    api_key = f.read().strip()

# API request
url = 'https://api.congress.gov/v3/bill/119/hr/'

params = {
    'api_key': api_key,
    'format': 'json',
    'limit': 1
}

response = requests.get(url, params=params)

# Check and print raw JSON response
if response.status_code == 200:
    data = response.json()
    print(json.dumps(data, indent=2))  # Pretty-print the raw JSON
else:
    print(f"Error: {response.status_code} - {response.text}")


{
  "bills": [
    {
      "congress": 119,
      "latestAction": {
        "actionDate": "2025-06-20",
        "text": "Placed on the Union Calendar, Calendar No. 133."
      },
      "number": "2808",
      "originChamber": "House",
      "originChamberCode": "H",
      "title": "Homebuyers Privacy Protection Act",
      "type": "HR",
      "updateDate": "2025-06-21",
      "updateDateIncludingText": "2025-06-21",
      "url": "https://api.congress.gov/v3/bill/119/hr/2808?format=json"
    }
  ],
  "pagination": {
    "count": 4067,
    "next": "https://api.congress.gov/v3/bill/119/hr?offset=1&limit=1&format=json"
  },
  "request": {
    "billType": "hr",
    "congress": "119",
    "contentType": "application/json",
    "format": "json"
  }
}


In [3]:
import requests
import pandas as pd
import time

# Load API key
with open('../api/congress_gov.txt', 'r') as f:
    api_key = f.read().strip()

# Base API endpoint
url = 'https://api.congress.gov/v3/bill/119/hr/'
limit = 250
offset = 0

all_records = []

while True:
    params = {
        'api_key': api_key,
        'format': 'json',
        'limit': limit,
        'offset': offset
    }

    response = requests.get(url, params=params)
    if response.status_code != 200:
        print(f"Error at offset {offset}: {response.status_code}")
        break

    data = response.json()
    bills = data.get("bills", [])
    if not bills:
        break

    for bill in bills:
        record = {
            "congress": bill.get("congress"),
            "number": bill.get("number"),
            "type": bill.get("type"),
            "title": bill.get("title"),
            "originChamber": bill.get("originChamber"),
            "latestActionDate": bill.get("latestAction", {}).get("actionDate"),
            "latestActionText": bill.get("latestAction", {}).get("text"),
            "updateDate": bill.get("updateDate"),
            "url": bill.get("url")
        }
        all_records.append(record)

    offset += limit
    print(f"Fetched {offset} bills...")
    time.sleep(0.1)  # Optional: be nice to the server

# Create final DataFrame
df = pd.DataFrame(all_records)
print(f"\n✅ Done! Total bills fetched: {len(df)}")


KeyboardInterrupt: 

In [62]:
df['number'] = df['number'].astype(int)
df['updateDate'] = pd.to_datetime(df['updateDate'])

In [63]:
df = df.sort_values(by='updateDate')

In [64]:
df.head()  # Preview the result

Unnamed: 0,congress,number,type,title,originChamber,latestActionDate,latestActionText,updateDate,url
3419,119,23,HR,Illegitimate Court Counteraction Act,House,2025-01-28,Cloture on the motion to proceed to the measur...,2025-01-29,https://api.congress.gov/v3/bill/119/hr/23?for...
4054,119,110,HR,Small Business Prosperity Act of 2025,House,2025-01-03,Referred to the House Committee on Ways and Me...,2025-01-31,https://api.congress.gov/v3/bill/119/hr/110?fo...
3486,119,21,HR,Born-Alive Abortion Survivors Protection Act,House,2025-01-24,Received in the Senate and Read twice and refe...,2025-01-31,https://api.congress.gov/v3/bill/119/hr/21?for...
4047,119,123,HR,Improving Science in Chemical Assessments Act,House,2025-01-03,"Referred to the Committee on Science, Space, a...",2025-02-03,https://api.congress.gov/v3/bill/119/hr/123?fo...
3942,119,134,HR,Protecting our Communities from Sexual Predato...,House,2025-01-03,Referred to the House Committee on the Judiciary.,2025-02-03,https://api.congress.gov/v3/bill/119/hr/134?fo...


In [66]:
specific_bill = df[df['number'] == 1]
specific_bill

Unnamed: 0,congress,number,type,title,originChamber,latestActionDate,latestActionText,updateDate,url
603,119,1,HR,One Big Beautiful Bill Act,House,2025-05-22,Motion to reconsider laid on the table Agreed ...,2025-06-18,https://api.congress.gov/v3/bill/119/hr/1?form...


In [69]:
import requests
import json

# Read API key from file
with open('../api/congress_gov.txt', 'r') as f:
    api_key = f.read().strip()

# API request
url = 'https://api.congress.gov/v3/bill/119/hr/1/text'

params = {
    'api_key': api_key,
    'format': 'json',
    'limit': 5
}

response = requests.get(url, params=params)

# Check and print raw JSON response
if response.status_code == 200:
    data = response.json()
    print(json.dumps(data, indent=2))  # Pretty-print the raw JSON
else:
    print(f"Error: {response.status_code} - {response.text}")


{
  "pagination": {
    "count": 2
  },
  "request": {
    "billNumber": "1",
    "billType": "hr",
    "billUrl": "https://api.congress.gov/v3/bill/119/hr/1?format=json",
    "congress": "119",
    "contentType": "application/json",
    "format": "json"
  },
  "textVersions": [
    {
      "date": "2025-05-22T04:00:00Z",
      "formats": [
        {
          "type": "Formatted Text",
          "url": "https://www.congress.gov/119/bills/hr1/BILLS-119hr1eh.htm"
        },
        {
          "type": "PDF",
          "url": "https://www.congress.gov/119/bills/hr1/BILLS-119hr1eh.pdf"
        },
        {
          "type": "Formatted XML",
          "url": "https://www.congress.gov/119/bills/hr1/BILLS-119hr1eh.xml"
        }
      ],
      "type": "Engrossed in House"
    },
    {
      "date": "2025-05-20T04:00:00Z",
      "formats": [
        {
          "type": "Formatted Text",
          "url": "https://www.congress.gov/119/bills/hr1/BILLS-119hr1rh.htm"
        },
        {
          

In [1]:
import requests
from bs4 import BeautifulSoup

# Choose the XML version of the bill
xml_url = "https://www.congress.gov/119/bills/hr1/BILLS-119hr1eh.xml"

# Get the XML content
response = requests.get(xml_url)
if response.status_code != 200:
    print(f"Error downloading XML: {response.status_code}")
else:
    soup = BeautifulSoup(response.content, "xml")  # use XML parser

    # Example: find all section headers and their text
    sections = []
    current_section = None

    for elem in soup.find_all():
        if elem.name == "section" or (elem.name == "title" and "Section" in elem.get_text()):
            if current_section:
                sections.append(current_section)
            current_section = {
                "heading": elem.get_text(strip=True),
                "content": ""
            }
        elif current_section:
            current_section["content"] += elem.get_text(separator=" ", strip=True) + " "

    if current_section:
        sections.append(current_section)

    # Show an example
    for section in sections[:5]:
        print(f"\n=== {section['heading']} ===\n{section['content'][:300]}...")



=== 1.Short titleThis Act may be cited as theOne Big Beautiful Bill Act. ===
1. Short title This Act may be cited as the One Big Beautiful Bill Act . One Big Beautiful Bill Act One Big Beautiful Bill Act ...

=== 2.Table of contentsThe table of contents of this Act is as follows:Sec. 1. Short title.Sec. 2. Table of contents.Title I—Committee on AgricultureSubtitle A—NutritionSec. 10001. Thrifty food plan.Sec. 10002. Able bodied adults without dependents work requirements.Sec. 10003. Able bodied adults without dependents waivers.Sec. 10004. Availability of standard utility allowances based on receipt of energy assistance.Sec. 10005. Restrictions on internet expenses.Sec. 10006. Matching funds requirements.Sec. 10007. Administrative cost sharing.Sec. 10008. General work requirement age.Sec. 10009. National Accuracy Clearinghouse.Sec. 10010. Quality control zero tolerance.Sec. 10011. National education and obesity prevention grant program repealer.Sec. 10012. Alien SNAP eligibility.Sec. 