## use congress.gov api to get texts of enrolled bills?

In [1]:
# manually gather list of numbers of endorsed Hour Resoultions from govinfo.gov 113th-118th congress
# use for loop to enter those HR numbers in query url for api call
# from json return gather most recent (first listed) link:
#     json_data = texts_json['textVersions'][0]['formats'][0]['url']
# scrape text from link

In [1]:
import json
import pandas as pd
import requests
import time
from bs4 import BeautifulSoup

import tqdm  # Import tqdm for the progress bar


# api key for congress.gov
from api_key import congress_api_key

## Retrieving summaries from bills in downloaded @uscongress data.world data

In [2]:
bill_num = []
summary = []

# Loop from 1 to highest numbered HR bill for 115th Congress found on govinfo.gov/bulkdata: 7401

for i in range(1, 7402):
    bill_lookup = i
    file_path = f"congress_115/bills/hr/hr{bill_lookup}/data.json"

    try:
        # Open and read the JSON file
        with open(file_path, 'r') as file:
            json_data = json.load(file)

        # Check if json_data is not None
        if json_data:
            # Check if 'summary' key exists in the JSON data
            if 'summary' in json_data and 'text' in json_data['summary']:
                # Extract text of summary
                summary_text = json_data['summary']['text']
                summary.append(summary_text)
                bill_num.append(bill_lookup)
            else:
                print(f"No summary found for bill {bill_lookup}")
        else:
            print(f"JSON data is None for bill {bill_lookup}")

    except:
        continue  # Continue to the next iteration of the loop


In [3]:
len(bill_num)

2255

In [4]:
max(bill_num)

3663

In [5]:
summary[2254]

'This bill designates the medical center of the Department of Veterans Affairs in Huntington, West Virginia, as the "Hershel `Woody\' Williams VA Medical Center."'

In [6]:
 bill_no = 3663
file_path = f"congress_115/bills/hr/hr{bill_no}/data.json"
with open(file_path, 'r') as file:
        json_data = json.load(file)
summary_text = json_data['summary']['text']

In [7]:
summary_text

'This bill designates the medical center of the Department of Veterans Affairs in Huntington, West Virginia, as the "Hershel `Woody\' Williams VA Medical Center."'

In [8]:
#Create dataframe of bill numbers and summaries
congress_115_hr_df = pd.DataFrame({'BillNumber': bill_num, 'Summary': summary})
congress_115_hr_df.head()

Unnamed: 0,BillNumber,Summary
0,5,(This measure has not been amended since it wa...
1,7,(This measure has not been amended since it wa...
2,10,Financial CHOICE Act of 2017\n\n (Sec. 2) This...
3,15,Raise the Wage Act\n\nThis bill amends the Fai...
4,19,Smithsonian Women's History Museum Act\n\nThis...


## Gather bill texts from congress.gov

In [9]:
# Loop through rows of congress_115_hr, 
# pass 'BillNumber' in query url for api call of bill text
# append text to 'Text' column of congress_115_hr
# append a null in cases of no text/failed return
# timer loop to avoid congress.gov api rate limit (1000 calls/hr)
# drop nulls
# save to csv –> add csv name to gitignore before creation

In [10]:
type(congress_115_hr_df['BillNumber'][0])

numpy.int64

In [22]:
congress_115_hr_df['Text'] = congress_115_hr_df['Text'].astype(str)


In [15]:
import pandas as pd
import requests
import time
from tqdm import tqdm  # Import tqdm for the progress bar

# Assuming you have a DataFrame named congress_115_hr_df

# Use tqdm() to create a progress bar for the loop
for i in range(0, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        congress_115_hr_df.at[index, 'Text'] = None  # Set a placeholder value, e.g., None

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(3) 

# Now congress_115_hr_df['Text'] column has the values or None for each row


Got URL for Bill 5 on row 0 out of 2254
Got URL for Bill 7 on row 1 out of 2254
Got URL for Bill 10 on row 2 out of 2254
Got URL for Bill 15 on row 3 out of 2254
Got URL for Bill 19 on row 4 out of 2254
Got URL for Bill 20 on row 5 out of 2254
Got URL for Bill 21 on row 6 out of 2254
Got URL for Bill 22 on row 7 out of 2254
Got URL for Bill 24 on row 8 out of 2254
Got URL for Bill 25 on row 9 out of 2254
Got URL for Bill 26 on row 10 out of 2254
Got URL for Bill 27 on row 11 out of 2254
Got URL for Bill 28 on row 12 out of 2254
Got URL for Bill 29 on row 13 out of 2254
Got URL for Bill 30 on row 14 out of 2254
Got URL for Bill 31 on row 15 out of 2254
Got URL for Bill 32 on row 16 out of 2254
Got URL for Bill 33 on row 17 out of 2254
Got URL for Bill 34 on row 18 out of 2254
Got URL for Bill 35 on row 19 out of 2254
Got URL for Bill 36 on row 20 out of 2254
Got URL for Bill 37 on row 21 out of 2254
Got URL for Bill 38 on row 22 out of 2254
Got URL for Bill 39 on row 23 out of 2254
Got 

KeyboardInterrupt: 

In [16]:
for i in range(46, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        congress_115_hr_df.at[index, 'Text'] = None  # Set a placeholder value, e.g., None

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(2) 

Got URL for Bill 62 on row 46 out of 2254
Got URL for Bill 63 on row 47 out of 2254
Got URL for Bill 64 on row 48 out of 2254
Got URL for Bill 65 on row 49 out of 2254
Got URL for Bill 66 on row 50 out of 2254
Got URL for Bill 67 on row 51 out of 2254
Got URL for Bill 68 on row 52 out of 2254
Got URL for Bill 69 on row 53 out of 2254
Got URL for Bill 70 on row 54 out of 2254
Got URL for Bill 71 on row 55 out of 2254
Got URL for Bill 72 on row 56 out of 2254
Got URL for Bill 73 on row 57 out of 2254
Got URL for Bill 74 on row 58 out of 2254
Got URL for Bill 75 on row 59 out of 2254
Got URL for Bill 76 on row 60 out of 2254
Got URL for Bill 77 on row 61 out of 2254
Got URL for Bill 78 on row 62 out of 2254
Got URL for Bill 79 on row 63 out of 2254
Got URL for Bill 80 on row 64 out of 2254
Got URL for Bill 81 on row 65 out of 2254
Got URL for Bill 82 on row 66 out of 2254
Got URL for Bill 83 on row 67 out of 2254
Got URL for Bill 84 on row 68 out of 2254
Got URL for Bill 85 on row 69 out 

NameError: name 'index' is not defined

In [17]:
for i in range(93, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        congress_115_hr_df.at[index, 'Text'] = None  # Set a placeholder value, e.g., None

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(2)

Got URL for Bill 109 on row 93 out of 2254
Got URL for Bill 110 on row 94 out of 2254
Got URL for Bill 111 on row 95 out of 2254
Got URL for Bill 112 on row 96 out of 2254
Got URL for Bill 113 on row 97 out of 2254
Got URL for Bill 114 on row 98 out of 2254
Got URL for Bill 115 on row 99 out of 2254
Got URL for Bill 116 on row 100 out of 2254
Got URL for Bill 117 on row 101 out of 2254
Got URL for Bill 118 on row 102 out of 2254
Got URL for Bill 119 on row 103 out of 2254
Got URL for Bill 120 on row 104 out of 2254
Got URL for Bill 121 on row 105 out of 2254
Got URL for Bill 122 on row 106 out of 2254
Got URL for Bill 123 on row 107 out of 2254
Got URL for Bill 124 on row 108 out of 2254
Got URL for Bill 125 on row 109 out of 2254
Got URL for Bill 126 on row 110 out of 2254
Got URL for Bill 127 on row 111 out of 2254
Got URL for Bill 128 on row 112 out of 2254
Got URL for Bill 129 on row 113 out of 2254
Got URL for Bill 130 on row 114 out of 2254
Got URL for Bill 131 on row 115 out of 

KeyboardInterrupt: 

In [18]:
for i in range(126, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        congress_115_hr_df.at[index, 'Text'] = None  # Set a placeholder value, e.g., None

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(2)

Got URL for Bill 142 on row 126 out of 2254
Got URL for Bill 143 on row 127 out of 2254
Failed to fetch data for Bill 144: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))


NameError: name 'index' is not defined

In [19]:
for i in range(128, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        congress_115_hr_df.at[index, 'Text'] = None  # Set a placeholder value, e.g., None

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(2)

Got URL for Bill 144 on row 128 out of 2254
Got URL for Bill 145 on row 129 out of 2254
Got URL for Bill 146 on row 130 out of 2254
Got URL for Bill 147 on row 131 out of 2254
Got URL for Bill 148 on row 132 out of 2254
Got URL for Bill 149 on row 133 out of 2254
Got URL for Bill 150 on row 134 out of 2254
Got URL for Bill 151 on row 135 out of 2254
Got URL for Bill 152 on row 136 out of 2254
Got URL for Bill 153 on row 137 out of 2254
Got URL for Bill 154 on row 138 out of 2254
Got URL for Bill 155 on row 139 out of 2254
Got URL for Bill 156 on row 140 out of 2254
Got URL for Bill 157 on row 141 out of 2254
Got URL for Bill 158 on row 142 out of 2254
Got URL for Bill 159 on row 143 out of 2254
Got URL for Bill 160 on row 144 out of 2254
Got URL for Bill 161 on row 145 out of 2254
Got URL for Bill 162 on row 146 out of 2254
Got URL for Bill 163 on row 147 out of 2254
Got URL for Bill 164 on row 148 out of 2254
Got URL for Bill 165 on row 149 out of 2254
Got URL for Bill 166 on row 150 

NameError: name 'index' is not defined

In [21]:
for i in range(205, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        congress_115_hr_df.at[index, 'Text'] = None  # Set a placeholder value, e.g., None

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(2)

Got URL for Bill 221 on row 205 out of 2254
Got URL for Bill 222 on row 206 out of 2254
Got URL for Bill 223 on row 207 out of 2254
Got URL for Bill 224 on row 208 out of 2254
Got URL for Bill 225 on row 209 out of 2254
Got URL for Bill 226 on row 210 out of 2254
Got URL for Bill 227 on row 211 out of 2254
Got URL for Bill 228 on row 212 out of 2254
Got URL for Bill 229 on row 213 out of 2254
Got URL for Bill 230 on row 214 out of 2254
Got URL for Bill 231 on row 215 out of 2254
Got URL for Bill 232 on row 216 out of 2254
Got URL for Bill 233 on row 217 out of 2254
Got URL for Bill 234 on row 218 out of 2254
Got URL for Bill 235 on row 219 out of 2254
Got URL for Bill 236 on row 220 out of 2254
Got URL for Bill 237 on row 221 out of 2254
Got URL for Bill 238 on row 222 out of 2254
Got URL for Bill 239 on row 223 out of 2254
Got URL for Bill 240 on row 224 out of 2254
Got URL for Bill 241 on row 225 out of 2254
Got URL for Bill 242 on row 226 out of 2254
Got URL for Bill 243 on row 227 

NameError: name 'index' is not defined

In [22]:
for i in range(302, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        congress_115_hr_df.at[index, 'Text'] = None  # Set a placeholder value, e.g., None

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(2)

Got URL for Bill 318 on row 302 out of 2254
Got URL for Bill 319 on row 303 out of 2254
Got URL for Bill 320 on row 304 out of 2254
Got URL for Bill 321 on row 305 out of 2254
Got URL for Bill 322 on row 306 out of 2254
Got URL for Bill 324 on row 307 out of 2254
Got URL for Bill 325 on row 308 out of 2254
Got URL for Bill 326 on row 309 out of 2254
Got URL for Bill 327 on row 310 out of 2254
Got URL for Bill 328 on row 311 out of 2254
Got URL for Bill 329 on row 312 out of 2254
Got URL for Bill 330 on row 313 out of 2254
Got URL for Bill 331 on row 314 out of 2254
Got URL for Bill 332 on row 315 out of 2254
Got URL for Bill 333 on row 316 out of 2254
Got URL for Bill 334 on row 317 out of 2254
Got URL for Bill 335 on row 318 out of 2254
Got URL for Bill 336 on row 319 out of 2254
Got URL for Bill 337 on row 320 out of 2254
Got URL for Bill 338 on row 321 out of 2254
Got URL for Bill 339 on row 322 out of 2254
Got URL for Bill 340 on row 323 out of 2254
Got URL for Bill 341 on row 324 

NameError: name 'index' is not defined

In [23]:
for i in range(381, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        congress_115_hr_df.at[index, 'Text'] = None  # Set a placeholder value, e.g., None

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(2)

Got URL for Bill 399 on row 381 out of 2254
Got URL for Bill 400 on row 382 out of 2254
Got URL for Bill 401 on row 383 out of 2254
Got URL for Bill 403 on row 384 out of 2254
Got URL for Bill 404 on row 385 out of 2254
Got URL for Bill 405 on row 386 out of 2254
Got URL for Bill 406 on row 387 out of 2254
Got URL for Bill 407 on row 388 out of 2254
Got URL for Bill 408 on row 389 out of 2254
Got URL for Bill 409 on row 390 out of 2254
Got URL for Bill 410 on row 391 out of 2254
Got URL for Bill 411 on row 392 out of 2254
Got URL for Bill 412 on row 393 out of 2254
Got URL for Bill 413 on row 394 out of 2254
Got URL for Bill 414 on row 395 out of 2254
Got URL for Bill 415 on row 396 out of 2254
Got URL for Bill 416 on row 397 out of 2254
Got URL for Bill 417 on row 398 out of 2254
Got URL for Bill 418 on row 399 out of 2254
Got URL for Bill 419 on row 400 out of 2254
Got URL for Bill 420 on row 401 out of 2254
Got URL for Bill 421 on row 402 out of 2254
Got URL for Bill 422 on row 403 

KeyboardInterrupt: 

In [24]:
for i in range(448, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        congress_115_hr_df.at[index, 'Text'] = None  # Set a placeholder value, e.g., None

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(2)

Got URL for Bill 469 on row 448 out of 2254
Got URL for Bill 470 on row 449 out of 2254
Got URL for Bill 471 on row 450 out of 2254
Got URL for Bill 472 on row 451 out of 2254
Got URL for Bill 473 on row 452 out of 2254
Got URL for Bill 474 on row 453 out of 2254
Got URL for Bill 475 on row 454 out of 2254
Got URL for Bill 476 on row 455 out of 2254
Got URL for Bill 478 on row 456 out of 2254
Got URL for Bill 479 on row 457 out of 2254
Got URL for Bill 480 on row 458 out of 2254
Got URL for Bill 481 on row 459 out of 2254
Got URL for Bill 482 on row 460 out of 2254
Got URL for Bill 483 on row 461 out of 2254
Got URL for Bill 484 on row 462 out of 2254
Got URL for Bill 485 on row 463 out of 2254
Got URL for Bill 486 on row 464 out of 2254
Failed to fetch data for Bill 487: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))


NameError: name 'index' is not defined

In [28]:
for i in range(465, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        congress_115_hr_df.at[index, 'TextUrl'] = None  # Set a placeholder value, e.g., None

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(2)

Got URL for Bill 487 on row 465 out of 2254
Got URL for Bill 488 on row 466 out of 2254
Got URL for Bill 489 on row 467 out of 2254
Got URL for Bill 490 on row 468 out of 2254
Got URL for Bill 492 on row 469 out of 2254
Got URL for Bill 494 on row 470 out of 2254
Got URL for Bill 495 on row 471 out of 2254
Got URL for Bill 496 on row 472 out of 2254
Got URL for Bill 497 on row 473 out of 2254
Got URL for Bill 498 on row 474 out of 2254
Got URL for Bill 499 on row 475 out of 2254
Got URL for Bill 500 on row 476 out of 2254
Got URL for Bill 501 on row 477 out of 2254
Got URL for Bill 502 on row 478 out of 2254
Got URL for Bill 503 on row 479 out of 2254
Got URL for Bill 504 on row 480 out of 2254
Got URL for Bill 505 on row 481 out of 2254
Got URL for Bill 506 on row 482 out of 2254
Got URL for Bill 507 on row 483 out of 2254
Got URL for Bill 508 on row 484 out of 2254
Got URL for Bill 509 on row 485 out of 2254
Got URL for Bill 510 on row 486 out of 2254
Got URL for Bill 511 on row 487 

KeyboardInterrupt: 

In [30]:
for i in range(573, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(2)

Got URL for Bill 610 on row 573 out of 2254
Got URL for Bill 612 on row 574 out of 2254
Got URL for Bill 613 on row 575 out of 2254
Got URL for Bill 614 on row 576 out of 2254
Got URL for Bill 615 on row 577 out of 2254
Got URL for Bill 616 on row 578 out of 2254
Got URL for Bill 617 on row 579 out of 2254
Got URL for Bill 618 on row 580 out of 2254
Got URL for Bill 619 on row 581 out of 2254
Got URL for Bill 620 on row 582 out of 2254
Got URL for Bill 621 on row 583 out of 2254
Got URL for Bill 622 on row 584 out of 2254
Got URL for Bill 623 on row 585 out of 2254
Got URL for Bill 624 on row 586 out of 2254
Got URL for Bill 625 on row 587 out of 2254
Got URL for Bill 627 on row 588 out of 2254
Got URL for Bill 628 on row 589 out of 2254
Got URL for Bill 629 on row 590 out of 2254
Got URL for Bill 630 on row 591 out of 2254
Got URL for Bill 631 on row 592 out of 2254
Got URL for Bill 632 on row 593 out of 2254
Got URL for Bill 635 on row 594 out of 2254
Got URL for Bill 636 on row 595 

KeyboardInterrupt: 

In [39]:
for i in range(634, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(2)

Got URL for Bill 677 on row 634 out of 2254
Got URL for Bill 678 on row 635 out of 2254
Got URL for Bill 679 on row 636 out of 2254
Got URL for Bill 680 on row 637 out of 2254
Got URL for Bill 681 on row 638 out of 2254
Got URL for Bill 682 on row 639 out of 2254
Got URL for Bill 683 on row 640 out of 2254
Got URL for Bill 684 on row 641 out of 2254
Got URL for Bill 685 on row 642 out of 2254
Got URL for Bill 686 on row 643 out of 2254
Got URL for Bill 687 on row 644 out of 2254
Got URL for Bill 688 on row 645 out of 2254
Got URL for Bill 689 on row 646 out of 2254
Got URL for Bill 690 on row 647 out of 2254
Got URL for Bill 691 on row 648 out of 2254
Got URL for Bill 694 on row 649 out of 2254
Got URL for Bill 697 on row 650 out of 2254
Got URL for Bill 698 on row 651 out of 2254
Got URL for Bill 699 on row 652 out of 2254
Got URL for Bill 700 on row 653 out of 2254
Got URL for Bill 701 on row 654 out of 2254
Got URL for Bill 702 on row 655 out of 2254
Got URL for Bill 703 on row 656 

KeyboardInterrupt: 

In [40]:
for i in range(784, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(2)

Got URL for Bill 847 on row 784 out of 2254
Got URL for Bill 848 on row 785 out of 2254
Got URL for Bill 849 on row 786 out of 2254
Got URL for Bill 850 on row 787 out of 2254
Got URL for Bill 852 on row 788 out of 2254
Got URL for Bill 853 on row 789 out of 2254
Got URL for Bill 854 on row 790 out of 2254
Got URL for Bill 855 on row 791 out of 2254
Got URL for Bill 856 on row 792 out of 2254
Got URL for Bill 857 on row 793 out of 2254
Got URL for Bill 858 on row 794 out of 2254
Got URL for Bill 860 on row 795 out of 2254
Got URL for Bill 861 on row 796 out of 2254
Got URL for Bill 863 on row 797 out of 2254
Got URL for Bill 865 on row 798 out of 2254
Got URL for Bill 866 on row 799 out of 2254
Got URL for Bill 868 on row 800 out of 2254
Got URL for Bill 869 on row 801 out of 2254
Got URL for Bill 870 on row 802 out of 2254
Got URL for Bill 871 on row 803 out of 2254
Got URL for Bill 872 on row 804 out of 2254
Got URL for Bill 873 on row 805 out of 2254
Got URL for Bill 874 on row 806 

KeyboardInterrupt: 

In [41]:
for i in range(871, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(2)

Got URL for Bill 951 on row 871 out of 2254
Got URL for Bill 952 on row 872 out of 2254
Got URL for Bill 953 on row 873 out of 2254
Got URL for Bill 954 on row 874 out of 2254
Got URL for Bill 955 on row 875 out of 2254
Got URL for Bill 956 on row 876 out of 2254
Got URL for Bill 957 on row 877 out of 2254
Got URL for Bill 958 on row 878 out of 2254
Got URL for Bill 959 on row 879 out of 2254
Got URL for Bill 960 on row 880 out of 2254
Got URL for Bill 961 on row 881 out of 2254
Got URL for Bill 962 on row 882 out of 2254
Got URL for Bill 963 on row 883 out of 2254
Got URL for Bill 965 on row 884 out of 2254
Got URL for Bill 966 on row 885 out of 2254
Got URL for Bill 967 on row 886 out of 2254
Got URL for Bill 968 on row 887 out of 2254
Got URL for Bill 969 on row 888 out of 2254
Got URL for Bill 970 on row 889 out of 2254
Got URL for Bill 971 on row 890 out of 2254
Got URL for Bill 972 on row 891 out of 2254
Got URL for Bill 973 on row 892 out of 2254
Got URL for Bill 974 on row 893 

KeyboardInterrupt: 

In [42]:
for i in range(920, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(1)

Got URL for Bill 1004 on row 920 out of 2254
Got URL for Bill 1005 on row 921 out of 2254
Got URL for Bill 1006 on row 922 out of 2254
Got URL for Bill 1007 on row 923 out of 2254
Got URL for Bill 1008 on row 924 out of 2254
Got URL for Bill 1009 on row 925 out of 2254
Got URL for Bill 1010 on row 926 out of 2254
Got URL for Bill 1011 on row 927 out of 2254
Got URL for Bill 1012 on row 928 out of 2254
Got URL for Bill 1013 on row 929 out of 2254
Got URL for Bill 1014 on row 930 out of 2254
Got URL for Bill 1015 on row 931 out of 2254
Got URL for Bill 1016 on row 932 out of 2254
Got URL for Bill 1019 on row 933 out of 2254
Got URL for Bill 1020 on row 934 out of 2254


KeyboardInterrupt: 

In [43]:
for i in range(935, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(2)

Got URL for Bill 1021 on row 935 out of 2254
Got URL for Bill 1023 on row 936 out of 2254
Got URL for Bill 1024 on row 937 out of 2254
Got URL for Bill 1025 on row 938 out of 2254
Got URL for Bill 1026 on row 939 out of 2254
Got URL for Bill 1027 on row 940 out of 2254
Got URL for Bill 1028 on row 941 out of 2254
Got URL for Bill 1029 on row 942 out of 2254
Got URL for Bill 1030 on row 943 out of 2254
Got URL for Bill 1031 on row 944 out of 2254
Got URL for Bill 1032 on row 945 out of 2254
Got URL for Bill 1033 on row 946 out of 2254
Got URL for Bill 1035 on row 947 out of 2254
Got URL for Bill 1036 on row 948 out of 2254
Got URL for Bill 1037 on row 949 out of 2254
Got URL for Bill 1038 on row 950 out of 2254
Got URL for Bill 1039 on row 951 out of 2254
Got URL for Bill 1040 on row 952 out of 2254
Got URL for Bill 1041 on row 953 out of 2254
Got URL for Bill 1042 on row 954 out of 2254
Got URL for Bill 1043 on row 955 out of 2254
Got URL for Bill 1045 on row 956 out of 2254
Got URL fo

KeyboardInterrupt: 

In [44]:
for i in range(978, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(2)

Got URL for Bill 1069 on row 978 out of 2254
Got URL for Bill 1072 on row 979 out of 2254
Got URL for Bill 1073 on row 980 out of 2254
Got URL for Bill 1074 on row 981 out of 2254
Got URL for Bill 1075 on row 982 out of 2254
Got URL for Bill 1076 on row 983 out of 2254
Got URL for Bill 1077 on row 984 out of 2254
Got URL for Bill 1078 on row 985 out of 2254
Got URL for Bill 1079 on row 986 out of 2254
Got URL for Bill 1080 on row 987 out of 2254
Got URL for Bill 1081 on row 988 out of 2254
Got URL for Bill 1082 on row 989 out of 2254
Got URL for Bill 1083 on row 990 out of 2254
Got URL for Bill 1084 on row 991 out of 2254
Got URL for Bill 1085 on row 992 out of 2254
Got URL for Bill 1086 on row 993 out of 2254


KeyboardInterrupt: 

In [45]:
for i in range(994, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(2)

Got URL for Bill 1087 on row 994 out of 2254
Got URL for Bill 1088 on row 995 out of 2254
Got URL for Bill 1089 on row 996 out of 2254
Got URL for Bill 1090 on row 997 out of 2254
Got URL for Bill 1092 on row 998 out of 2254
Got URL for Bill 1093 on row 999 out of 2254
Got URL for Bill 1095 on row 1000 out of 2254
Got URL for Bill 1096 on row 1001 out of 2254
Got URL for Bill 1097 on row 1002 out of 2254
Got URL for Bill 1098 on row 1003 out of 2254
Got URL for Bill 1099 on row 1004 out of 2254
Got URL for Bill 1100 on row 1005 out of 2254
Got URL for Bill 1101 on row 1006 out of 2254
Got URL for Bill 1103 on row 1007 out of 2254
Got URL for Bill 1104 on row 1008 out of 2254
Got URL for Bill 1106 on row 1009 out of 2254
Got URL for Bill 1107 on row 1010 out of 2254
Got URL for Bill 1108 on row 1011 out of 2254
Got URL for Bill 1109 on row 1012 out of 2254
Got URL for Bill 1110 on row 1013 out of 2254
Got URL for Bill 1111 on row 1014 out of 2254
Got URL for Bill 1112 on row 1015 out of

KeyboardInterrupt: 

In [46]:
for i in range(1082, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(2)

Got URL for Bill 1191 on row 1082 out of 2254
Got URL for Bill 1192 on row 1083 out of 2254
Got URL for Bill 1193 on row 1084 out of 2254
Got URL for Bill 1195 on row 1085 out of 2254
Got URL for Bill 1196 on row 1086 out of 2254
Got URL for Bill 1197 on row 1087 out of 2254
Got URL for Bill 1198 on row 1088 out of 2254
Got URL for Bill 1199 on row 1089 out of 2254
Got URL for Bill 1200 on row 1090 out of 2254
Got URL for Bill 1201 on row 1091 out of 2254
Got URL for Bill 1204 on row 1092 out of 2254
Got URL for Bill 1206 on row 1093 out of 2254
Got URL for Bill 1207 on row 1094 out of 2254
Got URL for Bill 1208 on row 1095 out of 2254
Got URL for Bill 1209 on row 1096 out of 2254
Got URL for Bill 1210 on row 1097 out of 2254
Got URL for Bill 1211 on row 1098 out of 2254
Got URL for Bill 1212 on row 1099 out of 2254
Got URL for Bill 1213 on row 1100 out of 2254
Got URL for Bill 1214 on row 1101 out of 2254
Got URL for Bill 1215 on row 1102 out of 2254
Got URL for Bill 1216 on row 1103 

KeyboardInterrupt: 

In [47]:
for i in range(1138, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(2)

Got URL for Bill 1259 on row 1138 out of 2254
Got URL for Bill 1260 on row 1139 out of 2254
Got URL for Bill 1261 on row 1140 out of 2254
Got URL for Bill 1262 on row 1141 out of 2254
Got URL for Bill 1264 on row 1142 out of 2254
Got URL for Bill 1265 on row 1143 out of 2254
Got URL for Bill 1266 on row 1144 out of 2254
Got URL for Bill 1267 on row 1145 out of 2254
Got URL for Bill 1268 on row 1146 out of 2254
Got URL for Bill 1270 on row 1147 out of 2254
Got URL for Bill 1272 on row 1148 out of 2254
Got URL for Bill 1273 on row 1149 out of 2254
Got URL for Bill 1274 on row 1150 out of 2254
Got URL for Bill 1275 on row 1151 out of 2254
Got URL for Bill 1276 on row 1152 out of 2254
Got URL for Bill 1277 on row 1153 out of 2254
Got URL for Bill 1278 on row 1154 out of 2254
Got URL for Bill 1279 on row 1155 out of 2254
Got URL for Bill 1280 on row 1156 out of 2254
Got URL for Bill 1281 on row 1157 out of 2254
Got URL for Bill 1283 on row 1158 out of 2254
Got URL for Bill 1284 on row 1159 

KeyboardInterrupt: 

In [48]:
for i in range(1223, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(2)

Got URL for Bill 1368 on row 1223 out of 2254
Got URL for Bill 1370 on row 1224 out of 2254
Got URL for Bill 1371 on row 1225 out of 2254
Got URL for Bill 1372 on row 1226 out of 2254
Got URL for Bill 1373 on row 1227 out of 2254
Got URL for Bill 1374 on row 1228 out of 2254
Got URL for Bill 1375 on row 1229 out of 2254
Got URL for Bill 1376 on row 1230 out of 2254
Got URL for Bill 1378 on row 1231 out of 2254
Got URL for Bill 1379 on row 1232 out of 2254
Got URL for Bill 1380 on row 1233 out of 2254
Got URL for Bill 1381 on row 1234 out of 2254
Got URL for Bill 1382 on row 1235 out of 2254
Got URL for Bill 1383 on row 1236 out of 2254
Got URL for Bill 1386 on row 1237 out of 2254
Got URL for Bill 1387 on row 1238 out of 2254
Got URL for Bill 1388 on row 1239 out of 2254
Got URL for Bill 1389 on row 1240 out of 2254
Got URL for Bill 1390 on row 1241 out of 2254
Got URL for Bill 1391 on row 1242 out of 2254
Got URL for Bill 1393 on row 1243 out of 2254
Got URL for Bill 1394 on row 1244 

KeyboardInterrupt: 

In [49]:
for i in range(1258, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(2)

Got URL for Bill 1413 on row 1258 out of 2254
Got URL for Bill 1414 on row 1259 out of 2254
Got URL for Bill 1415 on row 1260 out of 2254
Got URL for Bill 1416 on row 1261 out of 2254
Got URL for Bill 1417 on row 1262 out of 2254
Got URL for Bill 1418 on row 1263 out of 2254
Got URL for Bill 1419 on row 1264 out of 2254
Got URL for Bill 1421 on row 1265 out of 2254
Got URL for Bill 1424 on row 1266 out of 2254
Got URL for Bill 1425 on row 1267 out of 2254
Got URL for Bill 1426 on row 1268 out of 2254
Got URL for Bill 1427 on row 1269 out of 2254
Got URL for Bill 1428 on row 1270 out of 2254
Got URL for Bill 1429 on row 1271 out of 2254
Got URL for Bill 1430 on row 1272 out of 2254
Got URL for Bill 1431 on row 1273 out of 2254
Got URL for Bill 1432 on row 1274 out of 2254
Got URL for Bill 1434 on row 1275 out of 2254
Got URL for Bill 1435 on row 1276 out of 2254
Got URL for Bill 1437 on row 1277 out of 2254
Got URL for Bill 1438 on row 1278 out of 2254
Got URL for Bill 1439 on row 1279 

KeyboardInterrupt: 

In [50]:
for i in range(1430, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(2)

Got URL for Bill 1632 on row 1430 out of 2254
Got URL for Bill 1633 on row 1431 out of 2254
Got URL for Bill 1634 on row 1432 out of 2254
Got URL for Bill 1635 on row 1433 out of 2254
Got URL for Bill 1636 on row 1434 out of 2254
Got URL for Bill 1638 on row 1435 out of 2254
Got URL for Bill 1639 on row 1436 out of 2254
Got URL for Bill 1640 on row 1437 out of 2254
Got URL for Bill 1641 on row 1438 out of 2254
Got URL for Bill 1642 on row 1439 out of 2254
Got URL for Bill 1644 on row 1440 out of 2254
Got URL for Bill 1646 on row 1441 out of 2254
Got URL for Bill 1647 on row 1442 out of 2254
Got URL for Bill 1648 on row 1443 out of 2254
Got URL for Bill 1649 on row 1444 out of 2254
Got URL for Bill 1651 on row 1445 out of 2254
Got URL for Bill 1652 on row 1446 out of 2254
Got URL for Bill 1653 on row 1447 out of 2254
Got URL for Bill 1654 on row 1448 out of 2254
Got URL for Bill 1655 on row 1449 out of 2254
Got URL for Bill 1656 on row 1450 out of 2254
Got URL for Bill 1657 on row 1451 

KeyboardInterrupt: 

In [51]:
for i in range(1520, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(2)

Got URL for Bill 1746 on row 1520 out of 2254
Got URL for Bill 1747 on row 1521 out of 2254
Got URL for Bill 1750 on row 1522 out of 2254
Got URL for Bill 1752 on row 1523 out of 2254
Got URL for Bill 1753 on row 1524 out of 2254
Got URL for Bill 1754 on row 1525 out of 2254
Got URL for Bill 1755 on row 1526 out of 2254
Got URL for Bill 1759 on row 1527 out of 2254
Got URL for Bill 1760 on row 1528 out of 2254
Got URL for Bill 1761 on row 1529 out of 2254
Got URL for Bill 1762 on row 1530 out of 2254
Got URL for Bill 1763 on row 1531 out of 2254
Failed to fetch data for Bill 1764: 429 Client Error: Too Many Requests for url: https://api.congress.gov/v3/bill/115/hr/1764/text?api_key=RXmLBlUBIrHFOxxGoz4kDfY8TiBDI3RH5gaPn0KC
Got URL for Bill 1765 on row 1533 out of 2254
Failed to fetch data for Bill 1766: 429 Client Error: Too Many Requests for url: https://api.congress.gov/v3/bill/115/hr/1766/text?api_key=RXmLBlUBIrHFOxxGoz4kDfY8TiBDI3RH5gaPn0KC


KeyboardInterrupt: 

In [68]:
for i in range(1532, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(2)

Got URL for Bill 1764 on row 1532 out of 2254
Got URL for Bill 1765 on row 1533 out of 2254
Got URL for Bill 1766 on row 1534 out of 2254
Got URL for Bill 1767 on row 1535 out of 2254
Got URL for Bill 1768 on row 1536 out of 2254
Got URL for Bill 1769 on row 1537 out of 2254
Got URL for Bill 1770 on row 1538 out of 2254
Got URL for Bill 1772 on row 1539 out of 2254
Got URL for Bill 1774 on row 1540 out of 2254
Got URL for Bill 1775 on row 1541 out of 2254
Got URL for Bill 1780 on row 1542 out of 2254
Got URL for Bill 1782 on row 1543 out of 2254
Got URL for Bill 1784 on row 1544 out of 2254
Got URL for Bill 1786 on row 1545 out of 2254
Got URL for Bill 1787 on row 1546 out of 2254
Got URL for Bill 1788 on row 1547 out of 2254
Got URL for Bill 1789 on row 1548 out of 2254
Got URL for Bill 1790 on row 1549 out of 2254
Got URL for Bill 1791 on row 1550 out of 2254
Got URL for Bill 1792 on row 1551 out of 2254
Got URL for Bill 1793 on row 1552 out of 2254
Got URL for Bill 1794 on row 1553 

KeyboardInterrupt: 

In [69]:
for i in range(1601, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(2)

Got URL for Bill 1855 on row 1601 out of 2254
Got URL for Bill 1856 on row 1602 out of 2254
Got URL for Bill 1857 on row 1603 out of 2254
Got URL for Bill 1858 on row 1604 out of 2254
Got URL for Bill 1861 on row 1605 out of 2254
Got URL for Bill 1863 on row 1606 out of 2254
Got URL for Bill 1865 on row 1607 out of 2254
Got URL for Bill 1866 on row 1608 out of 2254
Got URL for Bill 1867 on row 1609 out of 2254
Got URL for Bill 1868 on row 1610 out of 2254
Failed to fetch data for Bill 1871: ('Connection aborted.', OSError(0, 'Error'))
Got URL for Bill 1873 on row 1612 out of 2254
Got URL for Bill 1874 on row 1613 out of 2254


KeyboardInterrupt: 

In [70]:
for i in range(1611, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(2)

Got URL for Bill 1871 on row 1611 out of 2254
Got URL for Bill 1873 on row 1612 out of 2254
Got URL for Bill 1874 on row 1613 out of 2254
Got URL for Bill 1875 on row 1614 out of 2254
Got URL for Bill 1876 on row 1615 out of 2254
Got URL for Bill 1877 on row 1616 out of 2254
Got URL for Bill 1879 on row 1617 out of 2254
Got URL for Bill 1881 on row 1618 out of 2254
Got URL for Bill 1883 on row 1619 out of 2254
Got URL for Bill 1885 on row 1620 out of 2254
Got URL for Bill 1886 on row 1621 out of 2254
Got URL for Bill 1887 on row 1622 out of 2254
Got URL for Bill 1888 on row 1623 out of 2254
Got URL for Bill 1889 on row 1624 out of 2254
Got URL for Bill 1891 on row 1625 out of 2254
Got URL for Bill 1892 on row 1626 out of 2254
Got URL for Bill 1894 on row 1627 out of 2254
Got URL for Bill 1896 on row 1628 out of 2254
Got URL for Bill 1897 on row 1629 out of 2254
Got URL for Bill 1900 on row 1630 out of 2254
Got URL for Bill 1901 on row 1631 out of 2254
Got URL for Bill 1903 on row 1632 

KeyboardInterrupt: 

In [71]:
for i in range(1799, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(2)

Got URL for Bill 2192 on row 1799 out of 2254
Got URL for Bill 2198 on row 1800 out of 2254
Got URL for Bill 2199 on row 1801 out of 2254
Got URL for Bill 2201 on row 1802 out of 2254
Got URL for Bill 2202 on row 1803 out of 2254
Got URL for Bill 2203 on row 1804 out of 2254
Got URL for Bill 2204 on row 1805 out of 2254
Got URL for Bill 2205 on row 1806 out of 2254
Got URL for Bill 2207 on row 1807 out of 2254
Got URL for Bill 2209 on row 1808 out of 2254
Got URL for Bill 2210 on row 1809 out of 2254
Got URL for Bill 2211 on row 1810 out of 2254
Got URL for Bill 2214 on row 1811 out of 2254
Got URL for Bill 2216 on row 1812 out of 2254
Got URL for Bill 2217 on row 1813 out of 2254
Got URL for Bill 2218 on row 1814 out of 2254
Got URL for Bill 2219 on row 1815 out of 2254
Got URL for Bill 2220 on row 1816 out of 2254
Got URL for Bill 2221 on row 1817 out of 2254
Got URL for Bill 2225 on row 1818 out of 2254
Got URL for Bill 2226 on row 1819 out of 2254
Got URL for Bill 2227 on row 1820 

KeyboardInterrupt: 

In [72]:
for i in range(1939, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(2)

Got URL for Bill 2462 on row 1939 out of 2254
Got URL for Bill 2464 on row 1940 out of 2254
Got URL for Bill 2465 on row 1941 out of 2254
Got URL for Bill 2466 on row 1942 out of 2254
Got URL for Bill 2467 on row 1943 out of 2254
Got URL for Bill 2471 on row 1944 out of 2254
Got URL for Bill 2472 on row 1945 out of 2254
Got URL for Bill 2476 on row 1946 out of 2254
Got URL for Bill 2483 on row 1947 out of 2254
Got URL for Bill 2484 on row 1948 out of 2254
Got URL for Bill 2486 on row 1949 out of 2254
Got URL for Bill 2495 on row 1950 out of 2254
Got URL for Bill 2498 on row 1951 out of 2254
Got URL for Bill 2501 on row 1952 out of 2254
Got URL for Bill 2502 on row 1953 out of 2254
Got URL for Bill 2505 on row 1954 out of 2254
Got URL for Bill 2506 on row 1955 out of 2254
Got URL for Bill 2508 on row 1956 out of 2254
Got URL for Bill 2514 on row 1957 out of 2254
Got URL for Bill 2516 on row 1958 out of 2254
Got URL for Bill 2517 on row 1959 out of 2254
Got URL for Bill 2519 on row 1960 

KeyboardInterrupt: 

In [73]:
for i in range(1977, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(2)

Got URL for Bill 2546 on row 1977 out of 2254
Got URL for Bill 2547 on row 1978 out of 2254
Got URL for Bill 2551 on row 1979 out of 2254
Got URL for Bill 2552 on row 1980 out of 2254
Got URL for Bill 2553 on row 1981 out of 2254
Got URL for Bill 2558 on row 1982 out of 2254
Got URL for Bill 2559 on row 1983 out of 2254
Got URL for Bill 2565 on row 1984 out of 2254
Got URL for Bill 2566 on row 1985 out of 2254
Got URL for Bill 2567 on row 1986 out of 2254
Got URL for Bill 2569 on row 1987 out of 2254
Got URL for Bill 2572 on row 1988 out of 2254


KeyboardInterrupt: 

In [74]:
for i in range(1989, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(2)

Got URL for Bill 2574 on row 1989 out of 2254
Got URL for Bill 2575 on row 1990 out of 2254
Got URL for Bill 2576 on row 1991 out of 2254
Failed to fetch data for Bill 2577: ('Connection aborted.', OSError(0, 'Error'))
Got URL for Bill 2579 on row 1993 out of 2254
Got URL for Bill 2581 on row 1994 out of 2254


KeyboardInterrupt: 

In [75]:
for i in range(1992, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(2)

Got URL for Bill 2577 on row 1992 out of 2254
Got URL for Bill 2579 on row 1993 out of 2254
Got URL for Bill 2581 on row 1994 out of 2254
Got URL for Bill 2582 on row 1995 out of 2254
Got URL for Bill 2584 on row 1996 out of 2254
Got URL for Bill 2588 on row 1997 out of 2254
Got URL for Bill 2589 on row 1998 out of 2254
Got URL for Bill 2590 on row 1999 out of 2254
Got URL for Bill 2591 on row 2000 out of 2254
Got URL for Bill 2596 on row 2001 out of 2254
Got URL for Bill 2602 on row 2002 out of 2254
Got URL for Bill 2604 on row 2003 out of 2254
Got URL for Bill 2611 on row 2004 out of 2254
Got URL for Bill 2613 on row 2005 out of 2254
Got URL for Bill 2615 on row 2006 out of 2254
Got URL for Bill 2618 on row 2007 out of 2254
Got URL for Bill 2623 on row 2008 out of 2254
Got URL for Bill 2624 on row 2009 out of 2254
Got URL for Bill 2625 on row 2010 out of 2254
Got URL for Bill 2626 on row 2011 out of 2254
Got URL for Bill 2627 on row 2012 out of 2254
Got URL for Bill 2631 on row 2013 

KeyboardInterrupt: 

In [76]:
for i in range(2204, 2255):
    bill_lookup = str(congress_115_hr_df['BillNumber'][i])
    query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
    try:
        response = requests.get(query_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
        json_data = response.json()
        text_url = json_data['textVersions'][0]['formats'][0]['url']
        congress_115_hr_df.at[i, 'TextUrl'] = text_url
        print(f"Got URL for Bill {bill_lookup} on row {i} out of 2254")

    except (requests.exceptions.RequestException, KeyError, IndexError, ValueError) as e:
        print(f"Failed to fetch data for Bill {bill_lookup}: {e}")
        

    # Introduce a delay to avoid hitting the API rate limit
    time.sleep(2)

Got URL for Bill 3262 on row 2204 out of 2254
Got URL for Bill 3265 on row 2205 out of 2254
Got URL for Bill 3266 on row 2206 out of 2254
Got URL for Bill 3267 on row 2207 out of 2254
Got URL for Bill 3268 on row 2208 out of 2254
Got URL for Bill 3277 on row 2209 out of 2254
Got URL for Bill 3280 on row 2210 out of 2254
Got URL for Bill 3287 on row 2211 out of 2254
Got URL for Bill 3292 on row 2212 out of 2254
Got URL for Bill 3298 on row 2213 out of 2254
Got URL for Bill 3322 on row 2214 out of 2254
Got URL for Bill 3333 on row 2215 out of 2254
Got URL for Bill 3334 on row 2216 out of 2254
Got URL for Bill 3349 on row 2217 out of 2254
Got URL for Bill 3353 on row 2218 out of 2254
Got URL for Bill 3354 on row 2219 out of 2254
Got URL for Bill 3355 on row 2220 out of 2254
Got URL for Bill 3358 on row 2221 out of 2254
Got URL for Bill 3362 on row 2222 out of 2254
Got URL for Bill 3364 on row 2223 out of 2254
Got URL for Bill 3368 on row 2224 out of 2254
Got URL for Bill 3369 on row 2225 

In [77]:
bill_lookup = str(congress_115_hr_df['BillNumber'][2134])
query_url = f"https://api.congress.gov/v3/bill/115/hr/{bill_lookup}/text?api_key={congress_api_key}"
    
response = requests.get(query_url)
response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        
json_data = response.json()
text_url = json_data['textVersions'][0]['formats'][0]['url']
congress_115_hr_df.at[2134, 'TextUrl'] = text_url

In [79]:
congress_115_hr_df

Unnamed: 0,BillNumber,Summary,TextUrl
0,5,(This measure has not been amended since it wa...,https://www.congress.gov/115/bills/hr5/BILLS-1...
1,7,(This measure has not been amended since it wa...,https://www.congress.gov/115/bills/hr7/BILLS-1...
2,10,Financial CHOICE Act of 2017\n\n (Sec. 2) This...,https://www.congress.gov/115/bills/hr10/BILLS-...
3,15,Raise the Wage Act\n\nThis bill amends the Fai...,https://www.congress.gov/115/bills/hr15/BILLS-...
4,19,Smithsonian Women's History Museum Act\n\nThis...,https://www.congress.gov/115/bills/hr19/BILLS-...
...,...,...,...
2250,3641,Free File Permanence Act of 2017\n\nThis bill ...,https://www.congress.gov/115/bills/hr3641/BILL...
2251,3652,This bill amends the Internal Revenue Code to ...,https://www.congress.gov/115/bills/hr3652/BILL...
2252,3653,Making Your Retirement Accessible Act or the M...,https://www.congress.gov/115/bills/hr3653/BILL...
2253,3655,Designates the facility of the United States P...,https://www.congress.gov/115/bills/hr3655/BILL...


In [80]:
congress_115_hr_df.to_csv('congress_115_hr_urls.csv', index=False)

In [31]:
congress_115_hr_df['TextUrl'][633]

'https://www.congress.gov/115/bills/hr676/BILLS-115hr676ih.htm'

In [38]:
congress_115_hr_df['TextUrl'][320]

'https://www.congress.gov/115/bills/hr337/BILLS-115hr337rfs.htm'