## Legiscan Summary Scraper

In [27]:
# IMPORT PACKAGES
from selenium import webdriver         
from selenium.webdriver.common.by import By   
from selenium.webdriver.support.ui import Select
from webdriver_manager.chrome import ChromeDriverManager
import time                                   
from bs4 import BeautifulSoup                 
import pandas as pd                             
import re                                       
import numpy as np
import requests
import os

In [43]:
# READ IN BILL INFO
bill_data = pd.read_csv("../modified_data/merged_bill_data.csv")
# Make a list of the bill names
names_list = bill_data.bill_name
states_list = bill_data.full_state
# Define a data frame to store the summaries
summary_df = pd.DataFrame(columns = ["full_state", "bill_name", "legiscan_summary"])

In [52]:
# Start a new Chrome web driver
driver = webdriver.Chrome(ChromeDriverManager().install())
# Go to the legiscan website
driver.get("https://legiscan.com/DC/legislation")
# Loop through the bills
for i in range(len(names_list)):
    # Skip Maine and Colorado because buggy - will fill in manually
    if (states_list[i] == "Maine") or (states_list[i] == "Colorado"):
        new_row = pd.DataFrame({"full_state": [states_list[i]], "bill_name": [names_list[i]], "legiscan_summary": ["FILL MANUALLY"]})
        summary_df = pd.concat([summary_df, new_row])
        continue
    # ===============================================================================================
    # Test 1 - selecting state from the dropdown
    # ===============================================================================================
    skip1 = 0
    driver.implicitly_wait(3)
    # Try to select a state from the dropdown
    try: 
        driver.find_element(By.ID, "edit-state-id")
    # If unable to find the dropdown element
    except: 
        # Try another ID
        try:
            driver.find_element(By.ID, "edit-state-id-1")
        except:
            # Try by class name
            try: 
                driver.find_element(By.CLASS_NAME, "form-select")
            # Ok nothing has worked...
            except:
                skip1 = 1
            # Yay the class name method worked!
            else:
                dropdown = driver.find_element(By.CLASS_NAME, "form-select")
        # Yay the second ID method worked!
        else: 
            dropdown = driver.find_element(By.ID, "edit-state-id-1")
    # Yay the first ID method worked!
    else:
        dropdown = driver.find_element(By.ID, "edit-state-id")
    
    # Do we need to move on to the next bill if nothing has worked?
    if skip1 == 1:
        # Yes - leave for  filling in manually
        new_row = pd.DataFrame({"full_state": [states_list[i]], "bill_name": [names_list[i]], "legiscan_summary": ["FILL MANUALLY"]})
        summary_df = pd.concat([summary_df, new_row])
        # Move on to the next bill
        continue
    else:
        # Try selecting from dropdown
        try: 
            select = Select(dropdown)
            select.select_by_visible_text(states_list[i])
        # It did not work, so we will fill in manually later
        except:
            new_row = pd.DataFrame({"full_state": [states_list[i]], "bill_name": [names_list[i]], "legiscan_summary": ["FILL MANUALLY"]})
            summary_df = pd.concat([summary_df, new_row])
        # Yay it worked!
        else:
            select = Select(dropdown)
            select.select_by_visible_text(states_list[i])
        
    # If everything is still working, let's keep going
    # ===============================================================================================
    # Test 2 - setting the bill number
    # ===============================================================================================
    skip2 = 0
    driver.implicitly_wait(3)
    # Try to find the box to edit the bill number
    try:
        driver.find_element(By.ID, "edit-bill-number")
    # Try another way if the first way did not work
    except:
        try: 
            driver.find_element(By.ID, "edit-bill-number-1")
        # Ok, skip bill because they both did not work
        except:
            skip2 = 1
        # Yay this worked
        else:
            box = driver.find_element(By.ID, "edit-bill-number-1")
            # ===================================
            try:
                box.clear()
            # Unable to clear the box
            except:
                skip2 = 1
            else:
                box.clear()
            # If clearing the box was successful
            if skip2 == 0: 
                # Try to send the keys
                try:
                    box.send_keys(names_list[i])
                except:
                    skip2 = 1
                # Sending keys worked!
                else:
                    box.clear()
                    box.send_keys(names_list[i])
            # ===================================
    # Go with the first method since it worked
    else: 
        box = driver.find_element(By.ID, "edit-bill-number")
        # ===================================
        try:
            box.clear()
        # Unable to clear the box
        except:
            skip2 = 1
        else:
            box.clear()
        # If clearing the box was successful
        if skip2 == 0: 
            box.send_keys(names_list[i])
        # ===================================
            
    # Move on to the next if not successful
    if skip2 == 1: 
        new_row = pd.DataFrame({"full_state": [states_list[i]], "bill_name": [names_list[i]], "legiscan_summary": ["FILL MANUALLY"]})
        summary_df = pd.concat([summary_df, new_row])
        continue
    
    # If everything is still working, let's keep going
    # ===============================================================================================
    # Test 3 - pressing the search button
    # ===============================================================================================
    skip3 = 0
    driver.implicitly_wait(3)
    try: 
        driver.find_element(By.ID, "edit-submit")
    except:
        # Try another way
        try: 
            driver.find_element(By.ID, "edit-submit-1")
        except:
            # Ok, trying one more time
            try:
                driver.find_element(By.CLASS_NAME, "form-submit")
            except:
                # Ok nothing worked
                skip3 = 1
            # Yay the class method worked!
            else:
                # Click it
                driver.find_element(By.CLASS_NAME, "form-submit").click()
        # Yay the second ID method worked
        else:
            driver.find_element(By.ID, "edit-submit-1").click()
    # The first ID method worked!
    else:
        driver.find_element(By.ID, "edit-submit").click()
    
    # Move on to the next if not successful
    if skip3 == 1: 
        new_row = pd.DataFrame({"full_state": [states_list[i]], "bill_name": [names_list[i]], "legiscan_summary": ["FILL MANUALLY"]})
        summary_df = pd.concat([summary_df, new_row])
        continue

    # If everything is still working, let's keep going
    # ===============================================================================================
    # Test 4 - finding the bill summary
    # ===============================================================================================
    skip4 = 0
    driver.implicitly_wait(3)
    # Try to find the bill's summary
    try: 
        driver.find_element(By.ID, "bill-summary")
    # Oh no it did not work - let's try another way
    except:
        try: 
            driver.find_element(By.ID, "bill-summary-1")
        # Ok, nothing worked, append blank summary
        except:
            skip4 = 1
        # Yay, the second method worked!
        else:
            summary = driver.find_element(By.ID, "bill-summary-1")
            new_row = pd.DataFrame({"full_state": [states_list[i]], "bill_name": [names_list[i]], "legiscan_summary": [summary.text]})
            summary_df = pd.concat([summary_df, new_row])
    # Yay, it worked!
    else:
        summary = driver.find_element(By.ID, "bill-summary")
        new_row = pd.DataFrame({"full_state": [states_list[i]], "bill_name": [names_list[i]], "legiscan_summary": [summary.text]})
        summary_df = pd.concat([summary_df, new_row])

    # Move on to the next if not successful
    if skip4 == 1: 
        new_row = pd.DataFrame({"full_state": [states_list[i]], "bill_name": [names_list[i]], "legiscan_summary": ["FILL MANUALLY"]})
        summary_df = pd.concat([summary_df, new_row])
        continue
# Quit out of the driver
driver.quit()        

StaleElementReferenceException: Message: stale element reference: element is not attached to the page document
  (Session info: chrome=111.0.5563.146)


In [54]:
summary_df.to_csv("../modified_data/legiscan_summaries.csv")

In [42]:
summaries = []
# Start a new Chrome web driver
driver = webdriver.Chrome(ChromeDriverManager().install())
# Go to legiscan website
driver.get("https://legiscan.com/DC/legislation")
for i in range(len(names_list)):
    print("BILL:", states_list[i], names_list[i])
    if states_list[i] == "Colorado" and names_list[i] == "HB23-1098":
        summaries.append("Colorado HB23-1098 - FILL LATER")
        continue
    # Skip Maine because buggy
    elif states_list[i] == "Maine":
        summaries.append(states_list[i] + names_list[i] + "- FILL LATER")
        continue
    # Wait
    driver.implicitly_wait(3)
    test1 = 0
    # ===============================================================================================
    # TEST: Select state from dropdown
    # ===============================================================================================
    try: 
        dropdown = driver.find_element(By.ID, "edit-state-id")
        select = Select(dropdown)
        select.select_by_visible_text(states_list[i])
    except:
        try:
            dropdown = driver.find_element(By.ID, "edit-state-id-1")
        except:
            print("ERROR Not able to edit bill state dropdown for:", states_list[i], names_list[i])
            test1 = 1
        else:
            dropdown = driver.find_element(By.ID, "edit-state-id-1")
            select = Select(dropdown)
            select.select_by_visible_text(states_list[i])
    else: 
        dropdown = driver.find_element(By.ID, "edit-state-id")
        select = Select(dropdown)
        select.select_by_visible_text(states_list[i])
    # Break out of loop if failed and append a message to fill in summary later
    if test1 == 1: 
        summaries.append(states_list[i] + names_list[i] + "- FILL LATER")
        break
    # ===============================================================================================
    # ===============================================================================================
    try:
        driver.implicitly_wait(3)
        driver.find_element(By.ID, "edit-bill-number").clear()
        driver.find_element(By.ID, "edit-bill-number").send_keys(names_list[i])
    # If the code runs with error
    except:
        # Try to find the bill number box another way
        try:
            driver.implicitly_wait(3)
            driver.find_element(By.ID, "edit-bill-number-1").clear()
            driver.find_element(By.ID, "edit-bill-number-1").send_keys(names_list[i])
        # If none of this works, print an error statement
        except: 
            print("ERROR Not able to edit the bill number for:", states_list[i], names_list[i])
            summaries.append(states_list[i] + names_list[i] + "- FILL LATER")
        # If the other way works, clear the box and send the keys
        else:
            driver.implicitly_wait(3)
            driver.find_element(By.ID, "edit-bill-number-1").clear()
            #input_box = driver.find_element(By.ID, "edit-bill-number-1")
            #input_box.send_keys(names_list[i])
            driver.find_element(By.ID, "edit-bill-number-1").send_keys(names_list[i])
            # Click search
            # driver.find_element(By.ID, "edit-submit").click()
            driver.find_element(By.CLASS_NAME, "form-submit").click()
            # Try to find the bill summary
            try: 
                driver.find_element(By.ID, "bill-summary")
            # If no bill summary found, fill with NaN and print error message
            except: 
                print("ERROR Could not find bill summary for:", states_list[i], names_list[i])
                summaries.append(states_list[i] + names_list[i] + "- FILL LATER")
            # If bill summary is found, add to list
            else:
                summary_element = driver.find_element(By.ID, "bill-summary")
                summaries.append(summary_element.text)
    # If the first way of finding the box with the bill number works, run the following
    else:
        driver.implicitly_wait(3)
        # Clear box and send bill number to box
        driver.find_element(By.ID, "edit-bill-number").clear()
        #input_box = driver.find_element(By.ID, "edit-bill-number")
        driver.find_element(By.ID, "edit-bill-number").send_keys(names_list[i])
        # Click search
        # driver.find_element(By.ID, "edit-submit").click()
        driver.find_element(By.CLASS_NAME, "form-submit").click()
        # Try to find the bill summary
        try: 
            driver.find_element(By.ID, "bill-summary")
        # If no bill summary found, fill with NaN and print error message
        except: 
            print("ERROR Could not find bill summary for:", states_list[i], names_list[i])
            summaries.append(states_list[i] + names_list[i] + "- FILL LATER")
        # If bill summary is found, add to list
        else:
            summary_element = driver.find_element(By.ID, "bill-summary")
            summaries.append(summary_element.text)
    # ===============================================================================================
    # ===============================================================================================
driver.quit()

BILL: Alaska HB27
BILL: Alaska HB105
BILL: Alaska SB96
BILL: Arizona SB1028
BILL: Arizona SB1026
BILL: Arizona SB1001
BILL: Arizona SB1030
BILL: Arizona SB1040
BILL: Arizona SB1702
BILL: Arizona SB1700
BILL: Arizona SB1698
BILL: Arizona SB1417
BILL: Arizona SB1694
BILL: Arizona HB2711
BILL: Arkansas HB1156
BILL: Arkansas SB43
BILL: Arkansas SB125
BILL: Arkansas SB199
BILL: Arkansas SB270
BILL: Arkansas HB1468
BILL: Arkansas SB294
BILL: Arkansas HB1615
BILL: California AB1314
BILL: Colorado HB23-1098
BILL: Connecticut HB6213
BILL: Connecticut SB468
BILL: Florida HB991
BILL: Florida HB1069
BILL: Florida HB1223
BILL: Florida SB1320
BILL: Florida HB999
BILL: Florida SB266
BILL: Florida SB254
BILL: Florida HB1421
BILL: Florida SB1674
BILL: Florida HB1521
BILL: Georgia SB88
BILL: Georgia SB141
ERROR Could not find bill summary for: Georgia SB141
BILL: Georgia SB140


ElementNotInteractableException: Message: element not interactable
  (Session info: chrome=111.0.5563.146)


In [35]:
legiscan_summaries_dict = {"full_state": states_list[:243], "bill_name": names_list[:243], "legiscan_summary": summaries}
legiscan_summaries = pd.DataFrame(legiscan_summaries_dict)

In [40]:
for i in range(len(legiscan_summaries.legiscan_summary)): 
    print("=========================")
    print(legiscan_summaries.full_state[i])
    print(legiscan_summaries.bill_name[i])
    print(legiscan_summaries.legiscan_summary[i])

Alaska
HB27
nan
Alaska
HB105
Civil rights: religious discrimination; Michigan religious freedom restoration act; create. Creates new act.
Alaska
SB96
Civil rights: privacy; restriction of student access to certain restrooms and changing areas based on biological sex; provide for. Creates new act.
Arizona
SB1028
Crimes: penalties; penalties for parents or guardians who procure gender transition surgeries or irreversible hormonal procedures for children under 18; provide for. Amends sec. 136b of 1931 PA 328 (MCL 750.136b).
Arizona
SB1026
Female sports team participation restricted to female sex.
Arizona
SB1001
Participation in athletic teams provision
Arizona
SB1030
Drag performances as adult entertainment classification; location restrictions on adult entertainment establishment
Arizona
SB1040
Education; participation in athletic teams provided.
Arizona
SB1702
Drag performances classified as adult entertainment, and location restrictions of an adult entertainment establishment expanded.