# Get PA Nursing Home Data

This notebook scrapes data from the Pennsylvania Department of Health, Nursing Care Facility Information database.

The goal is to create a database of nursing homes in Montgomery County, PA that accept Medicaid payments.

## Import dependencies

In [59]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

## Get the data

In [62]:
url = 'https://sais.health.pa.gov/commonpoc/content/publicweb/nhinformation2.asp?COUNTY=Montgomery'
html_page = requests.get(url)
soup = BeautifulSoup(html_page.content, 'html.parser')

## Extract the HTML table, with the target data
The target data are in the last table (`tables[-1]`) on the page.

In [63]:
tables = soup.find_all('table')

In [74]:
table_header = tables[-1].find_all('th')

In [78]:
table_header

[<th bgcolor="DarkSeaGreen">Select</th>,
 <th bgcolor="DarkSeaGreen">Name/Address/Phone</th>,
 <th bgcolor="DarkSeaGreen">Type of Ownership</th>,
 <th bgcolor="DarkSeaGreen">Licensure Status</th>,
 <th bgcolor="DarkSeaGreen">Last Inspection</th>,
 <th bgcolor="DarkSeaGreen">Size of Facility</th>,
 <th bgcolor="DarkSeaGreen">Number of Beds</th>,
 <th bgcolor="DarkSeaGreen">Payment Options</th>,
 <th bgcolor="DarkSeaGreen">Nursing Hours Per Resident Per Day</th>]

In [66]:
table_rows = tables[-1].find_all('tr')

In [70]:
list_of_rows = []

for tr in table_rows:
    td = tr.find_all('td')
    row = [tr.text for tr in td]
    list_of_rows.append(row)

df = pd.DataFrame(list_of_rows)

In [72]:
print(df[1])

0     ABRAMSON RESIDENCE1425 HORSHAM ROADNORTH WALES...
1     ABRAMSON RESIDENCE1425 HORSHAM ROADNORTH WALES...
2     ABRAMSON RESIDENCE1425 HORSHAM ROADNORTH WALES...
3     ABRAMSON SENIOR CARE AT LANKENAU MEDICAL CENTE...
4     (BrightenaataAmbler)AMBLER EXTENDED CARE CENTE...
                            ...                        
57    WILLOWBROOKE COURT AT SPRING HOUSE ESTATES728 ...
58    WILLOWBROOKE COURT SKILLED CARE CENTER AT BRIT...
59    WILLOWBROOKE COURT SKILLEDCARECTR AT FORT WASH...
60    WILLOWBROOKE COURTSKILLEDCARECENTER AT NORMAND...
61    WYNDMOOR HILLS REHABILITATION AND NURSING CENT...
Name: 1, Length: 62, dtype: object


In [73]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,530,531,532,533,534,535,536,537,538,539
0,,ABRAMSON RESIDENCE1425 HORSHAM ROADNORTH WALES...,NON-PROFIT,REGULAR,1/27/2020,Large,324,Private PaymentMedicareMedicaid,3.5,,...,3.65,,WYNDMOOR HILLS REHABILITATION AND NURSING CENT...,PROFIT,REGULAR,2/18/2020,Small,77,Private PaymentMedicareMedicaid,3.8
1,,ABRAMSON RESIDENCE1425 HORSHAM ROADNORTH WALES...,NON-PROFIT,REGULAR,1/27/2020,Large,324,Private PaymentMedicareMedicaid,3.5,,...,3.65,,WYNDMOOR HILLS REHABILITATION AND NURSING CENT...,PROFIT,REGULAR,2/18/2020,Small,77,Private PaymentMedicareMedicaid,3.8
2,,ABRAMSON RESIDENCE1425 HORSHAM ROADNORTH WALES...,NON-PROFIT,REGULAR,1/27/2020,Large,324,Private PaymentMedicareMedicaid,3.5,,...,,,,,,,,,,
3,,ABRAMSON SENIOR CARE AT LANKENAU MEDICAL CENTE...,NON-PROFIT,REGULAR,6/9/2020,Small,35,Private PaymentMedicare,6.45,,...,,,,,,,,,,
4,,(BrightenaataAmbler)AMBLER EXTENDED CARE CENTE...,PROFIT,REGULAR,5/7/2020,Small,100,Private PaymentMedicareMedicaid,3.01,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,,WILLOWBROOKE COURT AT SPRING HOUSE ESTATES728 ...,NON-PROFIT,REGULAR,12/19/2019,Small,96,Private PaymentMedicare,3.64,,...,,,,,,,,,,
58,,WILLOWBROOKE COURT SKILLED CARE CENTER AT BRIT...,NON-PROFIT,REGULAR,6/25/2019,Small,92,Private PaymentMedicareMedicaid,3.39,,...,,,,,,,,,,
59,,WILLOWBROOKE COURT SKILLEDCARECTR AT FORT WASH...,NON-PROFIT,REGULAR,9/17/2019,Small,40,Private PaymentMedicare,3.4,,...,,,,,,,,,,
60,,WILLOWBROOKE COURTSKILLEDCARECENTER AT NORMAND...,NON-PROFIT,REGULAR,4/17/2020,Small,73,Private PaymentMedicare,3.65,,...,,,,,,,,,,
