In [151]:
import requests
import json
import pandas as pd
import xml.etree.ElementTree as ET
import re

In [152]:
with open('secrets.json', 'r') as file:
	secrets = json.load(file)
	CONGRESS_API_KEY = secrets['CONGRESS-API-KEY']
	PROPUBLICA_API_KEY = secrets['PROPUBLICA-API-KEY']

In [153]:
url = f"https://api.congress.gov/v3/bill?api_key={CONGRESS_API_KEY}"
response = requests.get(url)

recent_bills = response.json()

In [154]:
def get_bill_data(congress, bill_type, bill_number):
	url = f"https://api.congress.gov/v3/bill/{congress}/{bill_type}/{bill_number}/text?api_key={CONGRESS_API_KEY}"
	response = requests.get(url)
	data = response.json()
	return data

In [155]:
def get_bill_text_url(bill_data):
	try:
		text_versions = bill_data['textVersions']
		formats = text_versions[0]['formats']
		urls = [x for x in formats if x['type'] == 'Formatted Text']
		url = urls[0]['url']
		return url
	except:
		return None

In [156]:
def get_bill_text(url):
	response = requests.get(url)
	return response.text

In [157]:
def get_length(text):
	text = text.replace('\n', '')
	len_opening = len('<html><body><pre>')
	len_closing = len('</pre></body></html>')

	text = text[len_opening:-len_closing]

	text = re.findall(r'[a-zA-Z ]', text)
	words = ''.join(text).split(' ')

	words = [x for x in words if x != '']

	return len(words)

In [158]:
for bill in recent_bills['bills']:
	type = bill['type'].lower()
	number = bill['number']
	congress = bill['congress']

	if type == 'hr':
		bill_data = get_bill_data(congress, type, number)
		url = get_bill_text_url(bill_data)
		if url:
			text = get_bill_text(url)
			length = get_length(text)
			print(f'{type}/{number}: {length} words  --> {url}')

hr/300: 1299 words  --> https://www.congress.gov/118/bills/hr300/BILLS-118hr300eh.htm
hr/400: 160 words  --> https://www.congress.gov/118/bills/hr400/BILLS-118hr400ih.htm
hr/346: 617 words  --> https://www.congress.gov/118/bills/hr346/BILLS-118hr346ih.htm
