In [None]:
from bs4 import BeautifulSoup
import requests
from datetime import datetime
import re
import pandas as pd

In [None]:
# papermill parameters
output_folder = "../output/"

## Functions

In [None]:
def find_potential_divs(soup):
    divs = soup.findAll(lambda tag:tag.name == "div" and len(tag.attrs) == 2)
    found_div = []
    for div in divs:
        if 'data-app-js' in div.attrs.keys():
            found_div.append(div)
    return found_div

def parse_state_actions(div):
    return eval(div['data-app-js'].replace(
    'false', 'False').replace('true', 'True').replace(
    'null', 'None'))['gdocs_object']

def get_df_from_parsed(parsed):
    df = pd.DataFrame(parsed[0][1])
    df.loc[0, 0] = 'Location'
    df.columns = df.loc[0]
    df = df.drop([0,1, 2])
    df = df.reset_index(drop=True)
    return df

## Get request and parse response

In [None]:
r = requests.get("https://www.kff.org/report-section/state-covid-19-data-and-policy-actions-policy-actions/#socialdistancing")

In [None]:
soup = BeautifulSoup(r.content, 'html5lib')

## Get tables

In [None]:
found_divs = find_potential_divs(soup)

### State Actions to Mitigate the Spread of COVID-19

Table schema:

```sql
-- KFF_US_STATE_MITIGATIONS DDL
-- cvcb.28JUL2020: created
-- cvcb.31JUL2020: added country_region field 

CREATE OR REPLACE TABLE KFF_US_STATE_MITIGATIONS (
    COUNTRY_REGION VARCHAR(255),
    PROVINCE_STATE VARCHAR(255),
    STATUS_OF_REOPENING VARCHAR(255),
    STAY_AT_HOME_ORDER VARCHAR(255),
    MANDATORY_QUARANTINE_FOR_TRAVELERS VARCHAR(255),
    NON_ESSENTIAL_BUSINESS_CLOSURES VARCHAR(255),
    LARGE_GATHERINGS_BAN VARCHAR(255),
    RESTAURANT_LIMITS VARCHAR(255),
    BAR_CLOSURES VARCHAR(255),
    FACE_COVERING_REQUIREMENT VARCHAR(255),
    PRIMARY_ELECTION_POSTPONEMENT VARCHAR(255),
    EMERGENCY_DECLARATION VARCHAR(255),
    LAST_UPDATED_DATE TIMESTAMP_NTZ(9)
);
```

In [None]:
parsed = parse_state_actions(found_divs[4])

In [None]:
df_actions = get_df_from_parsed(parsed)
df_actions["Last_Update_Date"] = datetime.utcnow()
df_actions["Country_Region"] = "United States"
df_actions["Primary Election Postponement"] = ""

In [None]:
df_actions.columns

In [None]:
df_actions = df_actions.rename(columns={
    "Bar Closures*": "Bar Closures",
    "Statewide Face Mask Requirement": "Face Covering Requirement"
})

In [None]:
df_actions.to_csv(
    f'{output_folder}/KFF_US_STATE_MITIGATIONS.csv',
    columns=[
        'Country_Region', 'Location', 'Status of Reopening',
        'Stay at Home Order', 'Mandatory Quarantine for Travelers',
        'Non-Essential Business Closures', 'Large Gatherings Ban',
        'Restaurant Limits', 'Bar Closures', 'Face Covering Requirement',
        'Primary Election Postponement', 'Emergency Declaration',
        'Last_Update_Date'
    ],
    index=False)

### Reopening status timeline

This requires the table `US_REOPENING_TIMELINE`, with the DDL

```sql
--- US reopening status DDL
--- cvcb.27JUL2020: created

CREATE OR REPLACE TABLE KFF_US_REOPENING_TIMELINE_INCREMENT (
  DATE DATE,
  COUNTRY_REGION VARCHAR(255),
  PROVINCE_STATE VARCHAR(255),
  STATUS VARCHAR(255)
);
```

In [None]:
reopening_timeline = df_actions[["Location", "Status of Reopening"]]
reopening_timeline["COUNTRY_REGION"] = "United States"
reopening_timeline[
    "DATE"] = f"{datetime.utcnow().year}-{datetime.utcnow().month:0>2d}-{datetime.utcnow().day:0>2d}"
reopening_timeline.columns = [
    "PROVINCE_STATE", "STATUS", "COUNTRY_REGION", "DATE"
]

In [None]:
reopening_timeline.to_csv(
    f"{output_folder}/KFF_US_REOPENING_TIMELINE_INCREMENT.csv",
    columns=["DATE", "COUNTRY_REGION", "PROVINCE_STATE", "STATUS"],
    index=False)

### State COVID-19 Health Policy Actions

DDL:

```sql
CREATE OR REPLACE TABLE KFF_US_POLICY_ACTIONS (
    COUNTRY_REGION VARCHAR(255),
    PROVINCE_STATE VARCHAR(255),
    WAIVE_COST_SHARING_FOR_COVID_19_TREATMENT VARCHAR(16777216),
    FREE_COST_VACCINE_WHEN_AVAILABLE VARCHAR(16777216),
    STATE_REQUIRES_WAIVER_OF_PRIOR_AUTHORIZATION_REQUIREMENTS VARCHAR(16777216),
    EARLY_PRESCRIPTION_REFILLS VARCHAR(16777216),
    MARKETPLACE_SPECIAL_ENROLLMENT_PERIOD VARCHAR(16777216),
    SECTION_1135_WAIVER VARCHAR(16777216),
    PAID_SICK_LEAVE	VARCHAR(16777216),
    PREMIUM_PAYMENT_GRACE_PERIOD VARCHAR(16777216),
    NOTES VARCHAR(16777216),
    LAST_UPDATED_DATE TIMESTAMP_NTZ(9)
);
```

In [None]:
parsed = parse_state_actions(found_divs[5])

In [None]:
df_health = get_df_from_parsed(parsed)
df_health["Notes"] = ""
df_health["Last_Updated_Date"] = datetime.utcnow()
df_health["Country_Region"] = "United States"

In [None]:
df_health.columns

In [None]:
df_health.to_csv(
    f'{output_folder}/KFF_US_POLICY_ACTIONS.csv',
    columns=[
        "Country_Region", 'Location',
        'Waive Cost Sharing for COVID-19 Treatment',
        'Free Cost Vaccine When Available',
        'State Requires Waiver of Prior Authorization Requirements*',
        'Early Prescription Refills',
        'Marketplace Special Enrollment Period (SEP)', 'Section 1135 Waiver',
        'Paid Sick Leave', 'Premium Payment Grace Period', 'Notes',
        'Last_Updated_Date'
    ],
    index=False)