In [None]:
#install prereqs if necessary !pip install selenium 

# Setup

In [131]:
#import packages

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common import keys
import numpy as np
import pandas as pd
from selenium.webdriver import ActionChains
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import time



We then create a driver, and point it at the NCSL database on state action in coronavirus relief funds.

In [424]:
driver = webdriver.Chrome()

driver.get("https://app.powerbi.com/view?r=eyJrIjoiMTcyNGQ5ZmUtNTY3Mi00YjViLTgyNjMtZjk1NzVkYTUyZGUzIiwidCI6IjM4MmZiOGIwLTRkYzMtNDEwNy04MGJkLTM1OTViMjQzMmZhZSIsImMiOjZ9&pageName=ReportSection")


# Helper Functions

We then define a series of functions to help us get the information from the table that we need. 

In [612]:
def get_rows(driver): 
    row_holder = driver.find_elements(By.XPATH, "//div[@role='row']")
    return row_holder

In [805]:
def get_text(rows): 
    results_text = [None] * len(rows)
    for i in range(len(rows)): 
        results_text[i] = rows[i].text
    return results_text

In [615]:
def get_grid(driver): 
    grids = driver.find_elements(By.XPATH, "//div[@role='grid']")
    return grids

We can test this process to confirm that the results are roughly what we want. However, we see that for a single pass, we only obtain a slice of the data, rather than the complete database. 

In [807]:
get_text(get_rows(driver))

['Row Selection\nCategory\nState\n Description\nSource',
 'Select Row\nUnemployment/Workforce Development\nWyoming\n$25 million to replenish the unemployment insurance trust fund.\nGovernor',
 'Select Row\nOther\nWyoming\n$275,000 for COVID-19 in state advertising.\nState',
 'Select Row\nHealth\nWyoming\n$30 million for health testing and tracing.\nState',
 'Select Row\nOther\nWyoming\n$30 million to the Wyoming Energy Rebound Program to give companies money needed to complete specific projects placed on hold because of the pandemic, including plugging and abandoning wells, and drilling uncompleted wells.\nMedia',
 'Select Row\nEmergency Management/Disaster Response\nWyoming\n$5 million for OSHA compliance.\nState',
 'Select Row\nOther\nWyoming\n$6,000 for a COVID-19 app.\nState',
 'Select Row\nHigher Education\nWyoming\nApproximately $26.5 million to the University of Wyoming for technology equipment, purchasing personal protective equipment, teaching support, and testing.\nState',
 '

In [806]:
results = np.append(results, get_text(get_rows(driver)))


['Row Selection\nCategory\nState\n Description\nSource',
 'Select Row\nUnemployment/Workforce Development\nWyoming\n$25 million to replenish the unemployment insurance trust fund.\nGovernor',
 'Select Row\nOther\nWyoming\n$275,000 for COVID-19 in state advertising.\nState',
 'Select Row\nHealth\nWyoming\n$30 million for health testing and tracing.\nState',
 'Select Row\nOther\nWyoming\n$30 million to the Wyoming Energy Rebound Program to give companies money needed to complete specific projects placed on hold because of the pandemic, including plugging and abandoning wells, and drilling uncompleted wells.\nMedia',
 'Select Row\nEmergency Management/Disaster Response\nWyoming\n$5 million for OSHA compliance.\nState',
 'Select Row\nOther\nWyoming\n$6,000 for a COVID-19 app.\nState',
 'Select Row\nHigher Education\nWyoming\nApproximately $26.5 million to the University of Wyoming for technology equipment, purchasing personal protective equipment, teaching support, and testing.\nState',
 '

As such what we need to do is scroll through the entire database, and continuously take slices and update our data. We create a series of functions to help us accomplish this.  

In [803]:
def get_rowcount(element):
    row_count = element[1].get_attribute("aria-rowindex")
    return row_count

In [567]:
def last_row(element):
    last_row = element[-1].get_attribute("aria-rowindex")
    return last_row

In [698]:
def move_cursor(driver): 
    data = driver.find_elements(By.XPATH, "//div[@role='gridcell']")
    data[-2].send_keys(Keys.DOWN)

We can test these functions to make sure that as we move the cursor our data indeed changes. 

In [712]:
test_x = get_rows(driver)

In [713]:
get_rowcount(test_x)

'24'

In [714]:
last_row(test_x)

'43'

In [707]:
move_cursor(driver)

# Scrape Data

We can then run a script to scroll through the database and scrape what we want. We also store the row numbers that we get, to make sure that we have all the rows we need. We manually identify that there are 1333 entries. While running this script, it is best to have the header window selected (unless disabled earlier) Note: this is not the most efficient script possible, as we are still obtaining duplicates through this process. However, we can simply remove these duplicates later. 

In [844]:
results = np.empty(0, dtype=str)
row_store = np.empty(0, dtype=str)

while (int(last_row(get_rows(driver))) < 1335): 
    
    results_text = get_text(get_rows(driver))

    results = np.append(results, results_text)
    
    for i in get_rows(driver): 
        row_store = np.append(row_store, i.get_attribute("aria-rowindex"))
    
    move_cursor(driver)
    time.sleep(5)

In [813]:
row_store

array(['1', '2', '3', ..., '1322', '1323', '1324'], dtype='<U4')

We see that we don't have all the data that we need, so we need to perform one last pass at the bottom of the data. 

In [814]:
last_10 = get_rows(driver)

In [815]:
last_row(last_10)

'1335'

In [816]:
results = np.append(results, get_text(last_10))

In [817]:
for i in last_10: 
    row_store = np.append(row_store, i.get_attribute("aria-rowindex"))

# Clean and Export

We now need to clean some of this data by removing duplicates, and splitting the list into columns, turning it into a dataframe, and exporting that data. NOTE: We save detailed cleaning of these data for later. 

In [830]:
results_tests = results[1:100]


In [835]:
len(np.unique(results_tests))

71

In [838]:
results_unique = np.unique(results)

In [839]:
len(results_unique)

1333

In [840]:
df = pd.DataFrame([x.split('\n') for x in results_unique])

In [842]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6
0,Row Selection,Category,State,Description,Source,,
1,Select Row,Community/Human Services,Alabama,$13.3 million to faith-based organizations.,State,,
2,Select Row,Community/Human Services,Alabama,$18.4 million to non-profits.,State,,
3,Select Row,Community/Human Services,Alabama,$3.6 million for the Alabama Food Banks Program.,State,,
4,Select Row,Community/Human Services,Alabama,"$757,000 for the Alabama AHEC Program.",State,,


In [843]:
df.to_csv('ncsl.csv')

# SCRATCH WORK

The rest of this document contains old, failed, testing code. 

In [752]:
results_list = list(results)

In [756]:
test = get_rows(driver)


In [757]:
test[1].text

'Select Row\nUnemployment/Workforce Development\nWyoming\n$25 million to replenish the unemployment insurance trust fund.\nGovernor'

In [763]:
test_array = np.array(test)
test_array[-1].text

'Select Row\nLocal Government\nWyoming\nGrants to local governments.\nState'

In [765]:
results[500].text

WebDriverException: Message: unknown error: unhandled inspector error: {"code":-32000,"message":"No node with given id found"}
  (Session info: chrome=113.0.5672.126)
Stacktrace:
0   chromedriver                        0x0000000100f3b8ac chromedriver + 4257964
1   chromedriver                        0x0000000100f33f40 chromedriver + 4226880
2   chromedriver                        0x0000000100b709d4 chromedriver + 281044
3   chromedriver                        0x0000000100b5b0b8 chromedriver + 192696
4   chromedriver                        0x0000000100b5934c chromedriver + 185164
5   chromedriver                        0x0000000100b59620 chromedriver + 185888
6   chromedriver                        0x0000000100b7df6c chromedriver + 335724
7   chromedriver                        0x0000000100b755c8 chromedriver + 300488
8   chromedriver                        0x0000000100b7410c chromedriver + 295180
9   chromedriver                        0x0000000100b76b34 chromedriver + 305972
10  chromedriver                        0x0000000100b76c24 chromedriver + 306212
11  chromedriver                        0x0000000100ba60ec chromedriver + 499948
12  chromedriver                        0x0000000100ba11cc chromedriver + 479692
13  chromedriver                        0x0000000100be27e4 chromedriver + 747492
14  chromedriver                        0x0000000100b9f98c chromedriver + 473484
15  chromedriver                        0x0000000100ba098c chromedriver + 477580
16  chromedriver                        0x0000000100efa900 chromedriver + 3991808
17  chromedriver                        0x0000000100efe354 chromedriver + 4006740
18  chromedriver                        0x0000000100efe940 chromedriver + 4008256
19  chromedriver                        0x0000000100f0433c chromedriver + 4031292
20  chromedriver                        0x0000000100efef34 chromedriver + 4009780
21  chromedriver                        0x0000000100ed7490 chromedriver + 3847312
22  chromedriver                        0x0000000100f1c9f4 chromedriver + 4131316
23  chromedriver                        0x0000000100f1cb4c chromedriver + 4131660
24  chromedriver                        0x0000000100f2d230 chromedriver + 4198960
25  libsystem_pthread.dylib             0x00000001a8a6606c _pthread_start + 148
26  libsystem_pthread.dylib             0x00000001a8a60e2c thread_start + 8


In [737]:
row_store

array(['1', '2', '3', ..., '1333', '1334', '1335'], dtype='<U4')

In [749]:
results[1].text

WebDriverException: Message: unknown error: unhandled inspector error: {"code":-32000,"message":"No node with given id found"}
  (Session info: chrome=113.0.5672.126)
Stacktrace:
0   chromedriver                        0x0000000100f3b8ac chromedriver + 4257964
1   chromedriver                        0x0000000100f33f40 chromedriver + 4226880
2   chromedriver                        0x0000000100b709d4 chromedriver + 281044
3   chromedriver                        0x0000000100b5b0b8 chromedriver + 192696
4   chromedriver                        0x0000000100b5934c chromedriver + 185164
5   chromedriver                        0x0000000100b59620 chromedriver + 185888
6   chromedriver                        0x0000000100b7df6c chromedriver + 335724
7   chromedriver                        0x0000000100b755c8 chromedriver + 300488
8   chromedriver                        0x0000000100b7410c chromedriver + 295180
9   chromedriver                        0x0000000100b76b34 chromedriver + 305972
10  chromedriver                        0x0000000100b76c24 chromedriver + 306212
11  chromedriver                        0x0000000100ba60ec chromedriver + 499948
12  chromedriver                        0x0000000100ba11cc chromedriver + 479692
13  chromedriver                        0x0000000100be27e4 chromedriver + 747492
14  chromedriver                        0x0000000100b9f98c chromedriver + 473484
15  chromedriver                        0x0000000100ba098c chromedriver + 477580
16  chromedriver                        0x0000000100efa900 chromedriver + 3991808
17  chromedriver                        0x0000000100efe354 chromedriver + 4006740
18  chromedriver                        0x0000000100efe940 chromedriver + 4008256
19  chromedriver                        0x0000000100f0433c chromedriver + 4031292
20  chromedriver                        0x0000000100efef34 chromedriver + 4009780
21  chromedriver                        0x0000000100ed7490 chromedriver + 3847312
22  chromedriver                        0x0000000100f1c9f4 chromedriver + 4131316
23  chromedriver                        0x0000000100f1cb4c chromedriver + 4131660
24  chromedriver                        0x0000000100f2d230 chromedriver + 4198960
25  libsystem_pthread.dylib             0x00000001a8a6606c _pthread_start + 148
26  libsystem_pthread.dylib             0x00000001a8a60e2c thread_start + 8


In [740]:
results_text = results 

for i in results: 
    results_text[i] = results[i].text

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [392]:
actions = ActionChains(driver)

In [470]:
scroll_button = driver.find_elements(By.XPATH, "//div[@class='scroll-bar-part-bar']")

In [482]:
grid_holder = driver.find_elements(By.XPATH, "//div[@role='row']")

In [471]:
len(scroll_button)

2

In [483]:
grid_holder[1].get_attribute("aria-rowindex")  ## aria-rowcount for GRID xpath, aria-rowindex for ROW xpath

'227'

In [361]:
grid_holder[1].location 

{'x': 121, 'y': 372}

In [362]:
grid_holder[2].location 

{'x': 121, 'y': 413}

In [363]:
413-372

41

In [465]:
soup = BeautifulSoup(driver.page_source, "html.parser")

In [466]:
rows = soup.find_all('div', role='row')


In [467]:
for row in rows:
    print(row.text)

Row SelectionCategory State Description Source 
Select RowOtherColorado$84.84 million to remain available for payroll and other necessary expenditures for public safety, public health, health care, human services, and similar employees whose services are substantially dedicated to mitigating or responding to the COVID-19 emergency.State
Select RowK-12 EducationConnecticut $164.5 million to facilitate the safe reopening of schools in the Fall and to support the academic success of all students. The funds will support bridging the technology gap, providing quality academic supports for all students, and implementing public health best practices to keep students and staff safe.State
Select RowHousing AssistanceConnecticut$10 million for mortgage relief to homeowners.State
Select RowHealthConnecticut$100 million to PPE is necessary for  frontline workers and first responders. PPE includes a broad range of items with a focus on surgical masks, face masks, face shields, gowns, and gloves.Sta

In [396]:
len(grid_holder)

21

In [750]:
for i in grid_holder: 
    print(i.text)

WebDriverException: Message: unknown error: unhandled inspector error: {"code":-32000,"message":"No node with given id found"}
  (Session info: chrome=113.0.5672.126)
Stacktrace:
0   chromedriver                        0x0000000100f3b8ac chromedriver + 4257964
1   chromedriver                        0x0000000100f33f40 chromedriver + 4226880
2   chromedriver                        0x0000000100b709d4 chromedriver + 281044
3   chromedriver                        0x0000000100b5b0b8 chromedriver + 192696
4   chromedriver                        0x0000000100b5934c chromedriver + 185164
5   chromedriver                        0x0000000100b59620 chromedriver + 185888
6   chromedriver                        0x0000000100b7df6c chromedriver + 335724
7   chromedriver                        0x0000000100b755c8 chromedriver + 300488
8   chromedriver                        0x0000000100b7410c chromedriver + 295180
9   chromedriver                        0x0000000100b76b34 chromedriver + 305972
10  chromedriver                        0x0000000100b76c24 chromedriver + 306212
11  chromedriver                        0x0000000100ba60ec chromedriver + 499948
12  chromedriver                        0x0000000100ba11cc chromedriver + 479692
13  chromedriver                        0x0000000100be27e4 chromedriver + 747492
14  chromedriver                        0x0000000100b9f98c chromedriver + 473484
15  chromedriver                        0x0000000100ba098c chromedriver + 477580
16  chromedriver                        0x0000000100efa900 chromedriver + 3991808
17  chromedriver                        0x0000000100efe354 chromedriver + 4006740
18  chromedriver                        0x0000000100efe940 chromedriver + 4008256
19  chromedriver                        0x0000000100f0433c chromedriver + 4031292
20  chromedriver                        0x0000000100efef34 chromedriver + 4009780
21  chromedriver                        0x0000000100ed7490 chromedriver + 3847312
22  chromedriver                        0x0000000100f1c9f4 chromedriver + 4131316
23  chromedriver                        0x0000000100f1cb4c chromedriver + 4131660
24  chromedriver                        0x0000000100f2d230 chromedriver + 4198960
25  libsystem_pthread.dylib             0x00000001a8a6606c _pthread_start + 148
26  libsystem_pthread.dylib             0x00000001a8a60e2c thread_start + 8


In [485]:
grid_holder[1].text

'K-12 Education\nConnecticut\n$164.5 million to facilitate the safe reopening of schools in the Fall and to support the academic success of all students. The funds will support bridging the technology gap, providing quality academic supports for all students, and implementing public health best practices to keep students and staff safe.\nState'

In [267]:
data_holder = grid_holder[0].text

In [268]:
df = pd.DataFrame([x.split('\n', 3) for x in data_holder.split('Select Row')])

In [252]:
df.shape

(21, 4)

In [269]:
df

Unnamed: 0,0,1,2,3
0,Row Selection,Category,State,Description\nSource\n
1,,Small Business Relief,Alabama,"$206 million for Revive Plus, a grant program ..."
2,,Health,Alabama,$1.2 million UAB Post-Acute Care Program.\nSta...
3,,Corrections/Courts/Judiciary,Alabama,$10 million to courts.\nState\n
4,,Health,Alabama,$10.3 million for healthcare costs.\nState\n
5,,Health,Alabama,$10.6 PPE Production Expansion.\nState\n
6,,Technology/Broadband,Alabama,$100 million for educational remote learning d...
7,,Community/Human Services,Alabama,$13.3 million to faith-based organizations.\nS...
8,,Higher Education,Alabama,$16 million for public universities remote ins...
9,,Health,Alabama,$16 million for the Medical Provider Program.\...


In [254]:
scroll_bar = driver.find_element(By.XPATH, "//div[@class='scroll-bar-div']")


In [270]:
driver.execute_script("arguments[0].scrollTop = arguments[0].scrollHeight;", scroll_bar)

In [261]:
scroll_bar.send_keys(Keys.END)

ElementNotInteractableException: Message: element not interactable
  (Session info: chrome=113.0.5672.126)
Stacktrace:
0   chromedriver                        0x00000001010838ac chromedriver + 4257964
1   chromedriver                        0x000000010107bf40 chromedriver + 4226880
2   chromedriver                        0x0000000100cb8854 chromedriver + 280660
3   chromedriver                        0x0000000100cedd78 chromedriver + 499064
4   chromedriver                        0x0000000100ced498 chromedriver + 496792
5   chromedriver                        0x0000000100ce91cc chromedriver + 479692
6   chromedriver                        0x0000000100d2a7e4 chromedriver + 747492
7   chromedriver                        0x0000000100ce798c chromedriver + 473484
8   chromedriver                        0x0000000100ce898c chromedriver + 477580
9   chromedriver                        0x0000000101042900 chromedriver + 3991808
10  chromedriver                        0x0000000101046354 chromedriver + 4006740
11  chromedriver                        0x0000000101046940 chromedriver + 4008256
12  chromedriver                        0x000000010104c33c chromedriver + 4031292
13  chromedriver                        0x0000000101046f34 chromedriver + 4009780
14  chromedriver                        0x000000010101f490 chromedriver + 3847312
15  chromedriver                        0x00000001010649f4 chromedriver + 4131316
16  chromedriver                        0x0000000101064b4c chromedriver + 4131660
17  chromedriver                        0x0000000101075230 chromedriver + 4198960
18  libsystem_pthread.dylib             0x00000001a8a6606c _pthread_start + 148
19  libsystem_pthread.dylib             0x00000001a8a60e2c thread_start + 8


In [239]:
actions.move_to_element(scroll_button[0]).click().perform()

In [240]:
grid_holder_1 = driver.find_elements(By.XPATH, "//div[@role='grid']")

In [243]:
df_2 = pd.DataFrame([x.split('\n', 3) for x in grid_holder_1[0].text.split('Select Row')])

In [244]:
df_2

Unnamed: 0,0,1,2,3
0,Row Selection,Category,State,Description\nSource\n
1,,Small Business Relief,Alabama,"$206 million for Revive Plus, a grant program ..."
2,,Health,Alabama,$1.2 million UAB Post-Acute Care Program.\nSta...
3,,Corrections/Courts/Judiciary,Alabama,$10 million to courts.\nState\n
4,,Health,Alabama,$10.3 million for healthcare costs.\nState\n
5,,Health,Alabama,$10.6 PPE Production Expansion.\nState\n
6,,Technology/Broadband,Alabama,$100 million for educational remote learning d...
7,,Community/Human Services,Alabama,$13.3 million to faith-based organizations.\nS...
8,,Higher Education,Alabama,$16 million for public universities remote ins...
9,,Health,Alabama,$16 million for the Medical Provider Program.\...


In [133]:
l = driver.find_elements(By.TAG_NAME, "visual-modern")
# to get the row count len method
print (len(l))

0


In [366]:
41*21

861

In [382]:
driver.execute_script("window.scrollBy(0, 100)")

In [394]:
grid_holder[1]

<selenium.webdriver.remote.webelement.WebElement (session="a65acb62688b7aa1799c10bcc999d251", element="558DBE7AD3994E5AAB4EC28CAA8F318E_element_39")>

In [398]:
actions.move_to_element(grid_holder[1]).click().scroll_by_amount(0, 410).perform()

In [344]:
actions.scroll_to_element(bleh2[20]).perform

<bound method ActionChains.perform of <selenium.webdriver.common.action_chains.ActionChains object at 0x7f8cd0bbd750>>

In [348]:
bleh2[20].location

{'x': 121, 'y': 701}

In [341]:
bleh2[20].get_attribute("aria-rowindex")

'21'

In [339]:
blah

'2'

In [351]:
bleh1 = driver.find_elements(By.XPATH, "//div[@class='mid-viewport']")
bleh2 = driver.find_elements(By.XPATH, "//div[@role='row']")

In [343]:
driver.find_elements(By.XPATH, "//div[@role='row']")[]

21

In [352]:
bleh1[0].text

'Select Row\nSmall Business Relief\nAlabama\n$206 million for Revive Plus, a grant program for small businesses, non-profits, and faith-based organizations. Revive Plus is the second wave of funding for these organizations with 50 or fewer employees and will award grants of up to $20,000 for expenses they have incurred due to operational interruptions caused by the pandemic and related business closures.\nState\nSelect Row\nHealth\nAlabama\n$1.2 million UAB Post-Acute Care Program.\nState\nSelect Row\nCorrections/Courts/Judiciary\nAlabama\n$10 million to courts.\nState\nSelect Row\nHealth\nAlabama\n$10.3 million for healthcare costs.\nState\nSelect Row\nHealth\nAlabama\n$10.6 PPE Production Expansion.\nState\nSelect Row\nTechnology/Broadband\nAlabama\n$100 million for educational remote learning devices.\nState\nSelect Row\nCommunity/Human Services\nAlabama\n$13.3 million to faith-based organizations.\nState\nSelect Row\nHigher Education\nAlabama\n$16 million for public universities re

In [289]:
grid_holder[1].text

'Select Row\nSmall Business Relief\nAlabama\n$206 million for Revive Plus, a grant program for small businesses, non-profits, and faith-based organizations. Revive Plus is the second wave of funding for these organizations with 50 or fewer employees and will award grants of up to $20,000 for expenses they have incurred due to operational interruptions caused by the pandemic and related business closures.\nState'

In [219]:
for i in b: 
    print(i.text)

Scroll down


In [207]:
b[0].text

'Category\nState\n Description\nSource\nHealth\nAlabama\n$50 million to hospitals.\nState\nTechnology/Broadband\nAlabama\n$50.3 million to a broadband voucher program.\nState\nHealth\nAlabama\n$6 million for a mental health provider program.\nState\nHealth\nAlabama\n$62.5 million for a nursing home program.\nState\nOther\nAlabama\n$7.3 million for a tourism marketing recovery program.\nState\nK-12 Education\nAlabama\n$70 million for education health and wellness.\nState\nCommunity/Human Services\nAlabama\n$757,000 for the Alabama AHEC Program.\nState\nCorrections/Courts/Judiciary\nAlabama\n$79 million for corrections.\nState\nK-12 Education\nAlabama\n$901,800 for the Pre-K Virtual Learning Program\nState\nSmall Business Relief\nAlabama\n$96.6 million to small business relief.\nState\nOther\nAlabama\nApproximately $10 million for state equipment.\nState\nHealth\nAlabama\nApproximately $18.3 million for nursing home testing\nState\nOther\nAlabama\nApproximately $212 million to state agen

In [200]:
b[0].get_attribute("aria-rowcount")

'501'

In [201]:
test = b[0].text

In [205]:
test

'Category\nState\n Description\nSource\nOther\nAlabama\n$7.3 million for a tourism marketing recovery program.\nState\nK-12 Education\nAlabama\n$70 million for education health and wellness.\nState\nCommunity/Human Services\nAlabama\n$757,000 for the Alabama AHEC Program.\nState\nCorrections/Courts/Judiciary\nAlabama\n$79 million for corrections.\nState\nK-12 Education\nAlabama\n$901,800 for the Pre-K Virtual Learning Program\nState\nSmall Business Relief\nAlabama\n$96.6 million to small business relief.\nState\nOther\nAlabama\nApproximately $10 million for state equipment.\nState\nHealth\nAlabama\nApproximately $18.3 million for nursing home testing\nState\nOther\nAlabama\nApproximately $212 million to state agencies.\nState\nTechnology/Broadband\nAlaska\n$1 million Department Laptops for Telework.\nState\nK-12 Education\nAlaska\n$1 million in Grant Funding for Private Schools.\nState\nHealth\nAlaska\n$1.3 million Blood Bank of America ‐ COVID.\nState\nOther\nAlaska\n$1.3 million LAW 

In [202]:
df = pd.DataFrame([x.split('\n', 3) for x in test.split('Select Row')])

In [203]:
df.shape

(1, 4)

In [204]:
df

Unnamed: 0,0,1,2,3
0,Category,State,Description,Source\nOther\nAlabama\n$7.3 million for a tou...


In [188]:
# Split the string into a list of rows.
rows = test.split('Select Row')

In [196]:
rows[8]

'\nHigher Education\nAlabama\n$16 million for public universities remote instruction and learning program.\nState\n'

In [124]:
# Split the string into a list of rows.

# Convert each row into a list of columns.
columns = [row.split(',', 2) for row in rows]



In [125]:
columns[7]

['Community/Human Services',
 ' Colorado',
 ' $22 million to the Colorado Department of Human services (CDHS) for expenditures incurred to respond to second-order effects of the COVID-19 emergency, specifically for increased caseload due to the COVID-19-related recession., State']

In [129]:
df

Unnamed: 0,0,1,2
0,Category,State,"Description, Source"
1,Small Business Relief,Michigan,$1 million for agriculture and rural developm...
2,Health,South Carolina,"$125 million to the hospital relief fund., Le..."
3,Housing Assistance,Wisconsin,$15 million to be invested in Wisconsin’s Low...
4,K-12 Education,Connecticut,$164.5 million to facilitate the safe reopeni...
5,Housing Assistance,Mississippi,$20 million for the Rental Assistance Grant P...
6,Small Business Relief,Alabama,"$206 million for Revive Plus, a grant program..."
7,Community/Human Services,Colorado,$22 million to the Colorado Department of Hum...
8,Other,Indiana,$4 million to the Indiana Department of Agric...
9,Unemployment/Workforce Development,Indiana,$400 million in remaining federal pandemic ai...


In [130]:
test

'Category, State, Description, Source\n  Small Business Relief, Michigan, $1 million for agriculture and rural development., Legislation\nHealth, South Carolina, $125 million to the hospital relief fund., Legislation\nHousing Assistance, Wisconsin, $15 million to be invested in Wisconsin’s Low Income Home Energy Assistance Program (LIHEAP), a program that is currently federally-funded and helps Wisconsinites with their heating costs., Governor\nK-12 Education, Connecticut, $164.5 million to facilitate the safe reopening of schools in the Fall and to support the academic success of all students. The funds will support bridging the technology gap, providing quality academic supports for all students, and implementing public health best practices to keep students and staff safe., State\nHousing Assistance, Mississippi, $20 million for the Rental Assistance Grant Program. Provides grants up to $30,000 to eligible rental businesses that lost rental income from March, 1 2020, through Decembe

In [59]:
b = driver.find_element(By.CSS_SELECTOR, '[aria-label=“Focus mode”]') 
# to get the row count len method
print (len(b))



InvalidSelectorException: Message: invalid selector: An invalid or illegal selector was specified
  (Session info: chrome=113.0.5672.126)
Stacktrace:
0   chromedriver                        0x0000000102c9b8ac chromedriver + 4257964
1   chromedriver                        0x0000000102c93f40 chromedriver + 4226880
2   chromedriver                        0x00000001028d09d4 chromedriver + 281044
3   chromedriver                        0x00000001028d5200 chromedriver + 299520
4   chromedriver                        0x00000001028d6b34 chromedriver + 305972
5   chromedriver                        0x00000001028d6c24 chromedriver + 306212
6   chromedriver                        0x000000010290b718 chromedriver + 522008
7   chromedriver                        0x00000001029427e4 chromedriver + 747492
8   chromedriver                        0x00000001028ff98c chromedriver + 473484
9   chromedriver                        0x000000010290098c chromedriver + 477580
10  chromedriver                        0x0000000102c5a900 chromedriver + 3991808
11  chromedriver                        0x0000000102c5e354 chromedriver + 4006740
12  chromedriver                        0x0000000102c5e940 chromedriver + 4008256
13  chromedriver                        0x0000000102c6433c chromedriver + 4031292
14  chromedriver                        0x0000000102c5ef34 chromedriver + 4009780
15  chromedriver                        0x0000000102c37490 chromedriver + 3847312
16  chromedriver                        0x0000000102c7c9f4 chromedriver + 4131316
17  chromedriver                        0x0000000102c7cb4c chromedriver + 4131660
18  chromedriver                        0x0000000102c8d230 chromedriver + 4198960
19  libsystem_pthread.dylib             0x00000001a8a6606c _pthread_start + 148
20  libsystem_pthread.dylib             0x00000001a8a60e2c thread_start + 8


In [37]:
l[5]

<selenium.webdriver.remote.webelement.WebElement (session="884e6dd0a253664bc66805bf8cc0acb2", element="8E31AC977D729652D0441ED2D11B8653_element_370")>

In [49]:
b[8].text

''

In [5]:
driver.find_element(By.ID, 'focus-mode-button').click()

NoSuchElementException: Message: no such element: Unable to locate element: {"method":"css selector","selector":"[id="focus-mode-button"]"}
  (Session info: chrome=113.0.5672.126)
Stacktrace:
0   chromedriver                        0x0000000104a3b8ac chromedriver + 4257964
1   chromedriver                        0x0000000104a33f40 chromedriver + 4226880
2   chromedriver                        0x00000001046709d4 chromedriver + 281044
3   chromedriver                        0x00000001046aba34 chromedriver + 522804
4   chromedriver                        0x00000001046e27e4 chromedriver + 747492
5   chromedriver                        0x000000010469f98c chromedriver + 473484
6   chromedriver                        0x00000001046a098c chromedriver + 477580
7   chromedriver                        0x00000001049fa900 chromedriver + 3991808
8   chromedriver                        0x00000001049fe354 chromedriver + 4006740
9   chromedriver                        0x00000001049fe940 chromedriver + 4008256
10  chromedriver                        0x0000000104a0433c chromedriver + 4031292
11  chromedriver                        0x00000001049fef34 chromedriver + 4009780
12  chromedriver                        0x00000001049d7490 chromedriver + 3847312
13  chromedriver                        0x0000000104a1c9f4 chromedriver + 4131316
14  chromedriver                        0x0000000104a1cb4c chromedriver + 4131660
15  chromedriver                        0x0000000104a2d230 chromedriver + 4198960
16  libsystem_pthread.dylib             0x00000001a8a6606c _pthread_start + 148
17  libsystem_pthread.dylib             0x00000001a8a60e2c thread_start + 8


In [412]:
driver.close()

In [None]:
total_rows = 1332

In [411]:
driver.switch_to.frame("//div[@class='mid-viewport']")

NoSuchFrameException: Message: //div[@class='mid-viewport']


In [460]:
g = driver.find_elements(By.CSS_SELECTOR, "[aria-label='Focus mode']")

In [461]:
len(g)

0

In [476]:
for i in scroll_button:
    print(i.size)

{'height': 9, 'width': 1178}
{'height': 10, 'width': 9}


In [456]:
g[5].get_attribute("aria-label")

'Zoom_In'

In [475]:
scroll_button[1]

<selenium.webdriver.remote.webelement.WebElement (session="19f9568cc2a589cb6e66ff6ff27f4294", element="5F256A6FD2CCB7F010AA4CEAECDAD5F4_element_344")>

In [477]:
from selenium import webdriver
from selenium.common.exceptions import WebDriverException

In [478]:
def wheel_element(element, deltaY = 120, offsetX = 0, offsetY = 0):
  error = element._parent.execute_script("""
    var element = arguments[0];
    var deltaY = arguments[1];
    var box = element.getBoundingClientRect();
    var clientX = box.left + (arguments[2] || box.width / 2);
    var clientY = box.top + (arguments[3] || box.height / 2);
    var target = element.ownerDocument.elementFromPoint(clientX, clientY);

    for (var e = target; e; e = e.parentElement) {
      if (e === element) {
        target.dispatchEvent(new MouseEvent('mouseover', {view: window, bubbles: true, cancelable: true, clientX: clientX, clientY: clientY}));
        target.dispatchEvent(new MouseEvent('mousemove', {view: window, bubbles: true, cancelable: true, clientX: clientX, clientY: clientY}));
        target.dispatchEvent(new WheelEvent('wheel',     {view: window, bubbles: true, cancelable: true, clientX: clientX, clientY: clientY, deltaY: deltaY}));
        return;
      }
    }    
    return "Element is not interactable";
    """, element, deltaY, offsetX, offsetY)
  if error:
    raise WebDriverException(error)

In [490]:
grid_holder[1].text

'K-12 Education\nConnecticut\n$164.5 million to facilitate the safe reopening of schools in the Fall and to support the academic success of all students. The funds will support bridging the technology gap, providing quality academic supports for all students, and implementing public health best practices to keep students and staff safe.\nState'

In [491]:
wheel_element(grid_holder[1], 82)

In [506]:
grid_holder = driver.find_elements(By.XPATH, "//div[@role='row']")

In [507]:
len(grid_holder)

21

In [505]:
grid_holder_2[2].text

'Technology/Broadband\nAlabama\n$50.3 million to a broadband voucher program.\nState'

In [494]:
type(grid_holder)

list

In [496]:
grid_holder[1]

<selenium.webdriver.remote.webelement.WebElement (session="19f9568cc2a589cb6e66ff6ff27f4294", element="5F256A6FD2CCB7F010AA4CEAECDAD5F4_element_382")>

In [497]:
len(grid_holder)

21

In [498]:
len(grid_holder_2)

21

In [499]:
grid_holder[20].text

'Other\nDelaware\n$1.6 million for pandemic consulting.\nState'

In [500]:
grid_holder_2[20].text

'Other\nDelaware\n$1.6 million for pandemic consulting.\nState'

In [None]:
send_keys(Keys.DOWN)

In [508]:
test = driver.find_elements(By.XPATH, "//div[@role='gridcell']")

In [509]:
len(test)

100

In [514]:
test[98].text

'$50 million for colleges and universities.'

In [515]:
ActionChains(driver).move_to_element(test[98]).click().send_keys(Keys.DOWN).perform

<bound method ActionChains.perform of <selenium.webdriver.common.action_chains.ActionChains object at 0x7f8ce0ce58d0>>

In [522]:
driver.find_elements(By.XPATH, "//div[@role='row']")[1].text

'Small Business Relief\nAlabama\n$17.5 million to an Agribusiness Stabilization Grant Program\nState'

In [520]:
test[98].send_keys(Keys.DOWN)

In [523]:
results = []

In [524]:
type(results)

list

In [543]:
test_1 = [1,2]
test_2 = [3,4]

In [544]:
test_3 = test_1 + test_2

In [545]:
test_3

[1, 2, 3, 4]

In [546]:
test[-2].text

'$50 million for colleges and universities.'

In [603]:
results_text = np.empty(0, dtype=str)

In [607]:
results_text = np.append(results_text, test_x)

In [609]:
results_text[1] = results_text[1].text

In [610]:
results_text[1]

'Unemployment/Workforce Development\nAlabama\n$385 million to the unemployment trust fund transfer program.\nState'

In [None]:
def get_grid(driver): 
    grids = driver.find_elements(By.XPATH, "//div[@role='grid']")
    return grids