In [1]:
from bs4 import BeautifulSoup
import requests
from datetime import datetime
import re
import pandas as pd

In [2]:
# papermill parameters
output_folder = "../output/"

## Functions

In [3]:
def find_potential_divs(soup):
    divs = soup.findAll(lambda tag:tag.name == "div" and len(tag.attrs) == 2)
    found_div = []
    for div in divs:
        if 'data-app-js' in div.attrs.keys():
            found_div.append(div)
    return found_div

def parse_state_actions(div):
    return eval(div['data-app-js'].replace(
    'false', 'False').replace('true', 'True').replace(
    'null', 'None'))['gdocs_object']

def get_df_from_parsed(parsed):
    df = pd.DataFrame(parsed[0][1])
    df.loc[0, 0] = 'Location'
    df.columns = df.loc[0]
    df = df.drop([0,1, 2])
    df = df.reset_index(drop=True)
    return df

## Get request and parse response

In [4]:
r = requests.get("https://www.kff.org/health-costs/issue-brief/state-data-and-policy-actions-to-address-coronavirus/")

In [5]:
soup = BeautifulSoup(r.content, 'html5lib')

## Get tables

In [6]:
found_divs = find_potential_divs(soup)

### State Actions to Mitigate the Spread of COVID-19

In [7]:
parsed = parse_state_actions(found_divs[3])

In [8]:
df_actions = get_df_from_parsed(parsed)
df_actions["Last_Update_Date"] = datetime.utcnow()

In [10]:
df_actions.to_csv(
    f'{output_folder}/KFF_US_STATE_MITIGATIONS.csv',
    columns=[
        'Location', 'Bar\/Restaurant Limits', 'Mandatory Quarantine',
        'Non-Essential Business Closures', 'Emergency Declaration',
        'Primary Election Postponement', 'State-Mandated School Closures',
        'Large Gatherings Ban', 'State Is Easing Social Distancing Measures',
        'Stay At Home Order', 'Last_Update_Date'
    ],
    index=False)

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)


### State COVID-19 Health Policy Actions

In [11]:
parsed = parse_state_actions(found_divs[4])

In [12]:
df_health = get_df_from_parsed(parsed)
df_health["Notes"] = ""
df_health["Last_Update_Date"] = datetime.utcnow()

In [13]:
df_health.to_csv(f'{output_folder}/KFF_US_POLICY_ACTIONS.csv', columns=['Location', 'Waive Cost Sharing for COVID-19 Treatment',
       'Free Cost Vaccine When Available',
       'State Requires Waiver of Prior Authorization Requirements',
       'Early Prescription Refills',
       'Marketplace Special Enrollment Period (SEP)', 'Section 1135 Waiver',
       'Paid Sick Leave', 'Premium Payment Grace Period', 'Notes', 'Last_Update_Date'], index=False)